diff --git a/tools/llm-oneshot/.cursor/rules/patterns-typescript.mdc b/tools/llm-oneshot/.cursor/rules/patterns-typescript.mdc
index b2d7e3fb9bb..5cca6438161 100644
--- a/tools/llm-oneshot/.cursor/rules/patterns-typescript.mdc
+++ b/tools/llm-oneshot/.cursor/rules/patterns-typescript.mdc
@@ -60,7 +60,7 @@ spacetime publish chat-app-20260106-183045 --module-path backend/spacetimedb
   "type": "module",
   "version": "1.0.0",
   "dependencies": {
-    "spacetimedb": "^1.11.0"
+    "spacetimedb": "^2.0.0"
   }
 }
 ```
@@ -109,7 +109,7 @@ src/index.ts    → Import schema, define all reducers and lifecycle hooks
   "dependencies": {
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
-    "spacetimedb": "^1.11.0"
+    "spacetimedb": "^2.0.0"
   },
   "devDependencies": {
     "@types/react": "^18.3.18",
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/01_basic.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/01_basic.md
index e280cb31317..62819fa3d52 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/01_basic.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/01_basic.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
+## UI & Style Guide
 
-Use SpacetimeDB brand styling (dark theme).
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,20 +49,44 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/02_scheduled.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/02_scheduled.md
index 07b087ecc24..432b7756171 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/02_scheduled.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/02_scheduled.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
+## UI & Style Guide
 
-Use SpacetimeDB brand styling (dark theme).
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,26 +49,56 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/03_realtime.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/03_realtime.md
index 8fea9f8955a..65e62dc0f02 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/03_realtime.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/03_realtime.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,32 +49,68 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/04_reactions.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/04_reactions.md
index 9befe3f3a9c..ee779f7a873 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/04_reactions.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/04_reactions.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,39 +49,81 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
 - Show reaction counts on messages that update in real-time
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/05_edit_history.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/05_edit_history.md
index e8d28c49129..1075eb6ee04 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/05_edit_history.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/05_edit_history.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,9 +122,21 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
 - Show "(edited)" indicator on edited messages
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/06_permissions.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/06_permissions.md
index d58d62e9192..fadfb394f93 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/06_permissions.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/06_permissions.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,9 +135,22 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
 - Kicked users immediately lose access and stop receiving room updates
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/07_presence.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/07_presence.md
index 7c0e6e794ff..3c314cf6f76 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/07_presence.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/07_presence.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,6 +135,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
@@ -69,9 +148,21 @@ Use SpacetimeDB brand styling (dark theme).
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
 
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
 ### Rich User Presence
 
 - Users can set their status: online, away, do-not-disturb, invisible
 - Show "Last active X minutes ago" for users who aren't online
 - Status changes sync to all viewers in real-time
 - Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/08_threading.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/08_threading.md
index 102bb8c3a7c..85253f6410a 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/08_threading.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/08_threading.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,6 +135,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
@@ -69,6 +148,13 @@ Use SpacetimeDB brand styling (dark theme).
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
 
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
 ### Rich User Presence
 
 - Users can set their status: online, away, do-not-disturb, invisible
@@ -76,9 +162,20 @@ Use SpacetimeDB brand styling (dark theme).
 - Status changes sync to all viewers in real-time
 - Auto-set to "away" after period of inactivity
 
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
 ### Message Threading
 
 - Users can reply to specific messages, creating a thread
 - Show reply count and preview on parent messages
 - Threaded view to see all replies to a message
 - New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/09_private_rooms.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/09_private_rooms.md
index 963ba85903d..cfef2296840 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/09_private_rooms.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/09_private_rooms.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,6 +135,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
@@ -69,6 +148,13 @@ Use SpacetimeDB brand styling (dark theme).
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
 
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
 ### Rich User Presence
 
 - Users can set their status: online, away, do-not-disturb, invisible
@@ -76,6 +162,11 @@ Use SpacetimeDB brand styling (dark theme).
 - Status changes sync to all viewers in real-time
 - Auto-set to "away" after period of inactivity
 
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
 ### Message Threading
 
 - Users can reply to specific messages, creating a thread
@@ -83,6 +174,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Threaded view to see all replies to a message
 - New replies sync in real-time to thread viewers
 
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
 ### Private Rooms and Direct Messages
 
 - Users can create private/invite-only rooms that don't appear in the public room list
@@ -90,3 +187,10 @@ Use SpacetimeDB brand styling (dark theme).
 - Direct messages (DMs) between two users as a special type of private room
 - Invited users receive notifications and can accept/decline invitations
 - Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/10_activity.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/10_activity.md
index 61a58b0f146..c99ab77b95e 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/10_activity.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/10_activity.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,6 +135,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
@@ -69,6 +148,13 @@ Use SpacetimeDB brand styling (dark theme).
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
 
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
 ### Rich User Presence
 
 - Users can set their status: online, away, do-not-disturb, invisible
@@ -76,6 +162,11 @@ Use SpacetimeDB brand styling (dark theme).
 - Status changes sync to all viewers in real-time
 - Auto-set to "away" after period of inactivity
 
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
 ### Message Threading
 
 - Users can reply to specific messages, creating a thread
@@ -83,6 +174,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Threaded view to see all replies to a message
 - New replies sync in real-time to thread viewers
 
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
 ### Private Rooms and Direct Messages
 
 - Users can create private/invite-only rooms that don't appear in the public room list
@@ -91,9 +188,21 @@ Use SpacetimeDB brand styling (dark theme).
 - Invited users receive notifications and can accept/decline invitations
 - Only members can see private room content and member lists
 
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
 ### Room Activity Indicators
 
 - Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
 - Display real-time message velocity or activity level per room
 - Activity indicators update live as conversation pace changes
 - Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/11_drafts.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/11_drafts.md
index 931cca6cf52..9e47c3402a5 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/11_drafts.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/11_drafts.md
@@ -2,14 +2,45 @@
 
 Create a **real-time chat app**.
 
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
 
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
 
 ## Features
 
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
 ### Basic Chat Features
 
 - Users can set a display name
@@ -18,36 +49,72 @@ Use SpacetimeDB brand styling (dark theme).
 - Show who's online
 - Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
 
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
 ### Typing Indicators
 
-- Show when other users are currently typing in a room
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
 - Typing indicator should automatically expire after a few seconds of inactivity
 - Display "User is typing..." or "Multiple users are typing..." in the UI
 
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
 ### Read Receipts
 
 - Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
 - Update read status in real-time as users view messages
 
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
 ### Unread Message Counts
 
 - Show unread message count badges on the room list
 - Track last-read position per user per room
 - Update counts in real-time as new messages arrive or are read
 
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
 ### Scheduled Messages
 
 - Users can compose a message and schedule it to send at a future time
 - Show pending scheduled messages to the author (with option to cancel)
 - Message appears in the room at the scheduled time
 
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
 ### Ephemeral/Disappearing Messages
 
 - Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
 - Show a countdown or indicator that the message will disappear
 - Message is permanently deleted from the database when time expires
 
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
 ### Message Reactions
 
 - Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
@@ -55,6 +122,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Users can toggle their own reactions on/off
 - Display who reacted when hovering over reaction counts
 
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
 ### Message Editing with History
 
 - Users can edit their own messages after sending
@@ -62,6 +135,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Other users can view the edit history of a message
 - Edits sync in real-time to all viewers
 
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
 ### Real-Time Permissions
 
 - Room creators are admins and can kick/ban users from their rooms
@@ -69,6 +148,13 @@ Use SpacetimeDB brand styling (dark theme).
 - Admins can promote other users to admin
 - Permission changes apply instantly without requiring reconnection
 
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
 ### Rich User Presence
 
 - Users can set their status: online, away, do-not-disturb, invisible
@@ -76,6 +162,11 @@ Use SpacetimeDB brand styling (dark theme).
 - Status changes sync to all viewers in real-time
 - Auto-set to "away" after period of inactivity
 
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
 ### Message Threading
 
 - Users can reply to specific messages, creating a thread
@@ -83,6 +174,12 @@ Use SpacetimeDB brand styling (dark theme).
 - Threaded view to see all replies to a message
 - New replies sync in real-time to thread viewers
 
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
 ### Private Rooms and Direct Messages
 
 - Users can create private/invite-only rooms that don't appear in the public room list
@@ -91,6 +188,13 @@ Use SpacetimeDB brand styling (dark theme).
 - Invited users receive notifications and can accept/decline invitations
 - Only members can see private room content and member lists
 
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
 ### Room Activity Indicators
 
 - Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
@@ -98,9 +202,20 @@ Use SpacetimeDB brand styling (dark theme).
 - Activity indicators update live as conversation pace changes
 - Help users quickly identify where active conversations are happening
 
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
 ### Draft Sync
 
 - Message drafts are saved and synced across user's devices in real-time
 - Users can resume typing where they left off on any device
 - Each room maintains its own draft per user
 - Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/12_anon_migration.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/12_anon_migration.md
new file mode 100644
index 00000000000..7550fc2abe2
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/12_anon_migration.md
@@ -0,0 +1,235 @@
+# Chat App - Anonymous to Registered Migration
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/12_full.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/12_full.md
deleted file mode 100644
index a8e387576d1..00000000000
--- a/tools/llm-oneshot/apps/chat-app/prompts/composed/12_full.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Chat App - Full Features
-
-Create a **real-time chat app**.
-
-**See `language/*.md` for language-specific setup, architecture, and constraints.**
-
-## UI Requirements
-
-Use SpacetimeDB brand styling (dark theme).
-
-## Features
-
-### Basic Chat Features
-
-- Users can set a display name
-- Users can create chat rooms and join/leave them
-- Users can send messages to rooms they've joined
-- Show who's online
-- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
-
-### Typing Indicators
-
-- Show when other users are currently typing in a room
-- Typing indicator should automatically expire after a few seconds of inactivity
-- Display "User is typing..." or "Multiple users are typing..." in the UI
-
-### Read Receipts
-
-- Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
-- Update read status in real-time as users view messages
-
-### Unread Message Counts
-
-- Show unread message count badges on the room list
-- Track last-read position per user per room
-- Update counts in real-time as new messages arrive or are read
-
-### Scheduled Messages
-
-- Users can compose a message and schedule it to send at a future time
-- Show pending scheduled messages to the author (with option to cancel)
-- Message appears in the room at the scheduled time
-
-### Ephemeral/Disappearing Messages
-
-- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
-- Show a countdown or indicator that the message will disappear
-- Message is permanently deleted from the database when time expires
-
-### Message Reactions
-
-- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
-- Show reaction counts on messages that update in real-time
-- Users can toggle their own reactions on/off
-- Display who reacted when hovering over reaction counts
-
-### Message Editing with History
-
-- Users can edit their own messages after sending
-- Show "(edited)" indicator on edited messages
-- Other users can view the edit history of a message
-- Edits sync in real-time to all viewers
-
-### Real-Time Permissions
-
-- Room creators are admins and can kick/ban users from their rooms
-- Kicked users immediately lose access and stop receiving room updates
-- Admins can promote other users to admin
-- Permission changes apply instantly without requiring reconnection
-
-### Rich User Presence
-
-- Users can set their status: online, away, do-not-disturb, invisible
-- Show "Last active X minutes ago" for users who aren't online
-- Status changes sync to all viewers in real-time
-- Auto-set to "away" after period of inactivity
-
-### Message Threading
-
-- Users can reply to specific messages, creating a thread
-- Show reply count and preview on parent messages
-- Threaded view to see all replies to a message
-- New replies sync in real-time to thread viewers
-
-### Private Rooms and Direct Messages
-
-- Users can create private/invite-only rooms that don't appear in the public room list
-- Room creators can invite specific users by username
-- Direct messages (DMs) between two users as a special type of private room
-- Invited users receive notifications and can accept/decline invitations
-- Only members can see private room content and member lists
-
-### Room Activity Indicators
-
-- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
-- Display real-time message velocity or activity level per room
-- Activity indicators update live as conversation pace changes
-- Help users quickly identify where active conversations are happening
-
-### Draft Sync
-
-- Message drafts are saved and synced across user's devices in real-time
-- Users can resume typing where they left off on any device
-- Each room maintains its own draft per user
-- Drafts persist across sessions until sent or cleared
-
-### Anonymous to Registered Migration
-
-- Users can join rooms and send messages without creating an account
-- Anonymous users have a temporary identity that persists for their session
-- When an anonymous user registers, their identity and message history are preserved
-- Room memberships and all associated data transfer to the registered account
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/13_pinned.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/13_pinned.md
new file mode 100644
index 00000000000..7a54a239267
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/13_pinned.md
@@ -0,0 +1,249 @@
+# Chat App - Pinned Messages
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/14_profiles.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/14_profiles.md
new file mode 100644
index 00000000000..d933bc8a9f2
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/14_profiles.md
@@ -0,0 +1,262 @@
+# Chat App - User Profiles
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/15_mentions.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/15_mentions.md
new file mode 100644
index 00000000000..2a164bd7801
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/15_mentions.md
@@ -0,0 +1,280 @@
+# Chat App - Full Features (18)
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
+
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing `@username`
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications (mentions, invites, etc.) with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time — new mentions appear instantly in the bell count
+- Clicking a notification navigates to the source message in its channel
+
+**UI contract:**
+- Mention highlighting: `@username` text in messages is visually distinct (bold, colored, or wrapped in a styled `span`)
+- Notification bell: `button` with text "🔔" or aria-label containing "notification" visible in the sidebar or header
+- Unread count: a numeric badge near the bell showing unread notification count
+- Notification panel: clicking the bell shows a list of notifications with message text and channel name
+- Mark read: `button` with text "Mark Read" or "Mark All Read" in the notification panel
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/16_bookmarks.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/16_bookmarks.md
new file mode 100644
index 00000000000..371c51918df
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/16_bookmarks.md
@@ -0,0 +1,295 @@
+# Chat App - Bookmarked Messages
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
+
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing `@username`
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications (mentions, invites, etc.) with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time — new mentions appear instantly in the bell count
+- Clicking a notification navigates to the source message in its channel
+
+**UI contract:**
+- Mention highlighting: `@username` text in messages is visually distinct (bold, colored, or wrapped in a styled `span`)
+- Notification bell: `button` with text "🔔" or aria-label containing "notification" visible in the sidebar or header
+- Unread count: a numeric badge near the bell showing unread notification count
+- Notification panel: clicking the bell shows a list of notifications with message text and channel name
+- Mark read: `button` with text "Mark Read" or "Mark All Read" in the notification panel
+
+### Bookmarked/Saved Messages
+
+- Users can bookmark any message for personal reference (bookmark icon on hover)
+- A "Saved Messages" panel in the sidebar shows all bookmarked messages across all channels
+- Each bookmark shows the message content, sender, channel name, and timestamp
+- Users can remove bookmarks
+- Bookmarks are personal — only visible to the user who saved them
+- Bookmark list updates in real-time (e.g., if a bookmarked message is edited, the bookmark reflects the change)
+
+**UI contract:**
+- Bookmark button: `button` with text "Bookmark" or "Save" or `aria-label` containing "bookmark" or "save" visible on message hover
+- Saved panel: `button` with text "Saved" or "Bookmarks" in the sidebar, opening a panel
+- Bookmark entry: each saved message shows the message text and the channel/sender it came from
+- Remove: `button` with text "Remove" or "Unsave" next to bookmarked messages in the panel
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/17_forwarding.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/17_forwarding.md
new file mode 100644
index 00000000000..75cc61c0efd
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/17_forwarding.md
@@ -0,0 +1,309 @@
+# Chat App - Message Forwarding
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
+
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing `@username`
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications (mentions, invites, etc.) with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time — new mentions appear instantly in the bell count
+- Clicking a notification navigates to the source message in its channel
+
+**UI contract:**
+- Mention highlighting: `@username` text in messages is visually distinct (bold, colored, or wrapped in a styled `span`)
+- Notification bell: `button` with text "🔔" or aria-label containing "notification" visible in the sidebar or header
+- Unread count: a numeric badge near the bell showing unread notification count
+- Notification panel: clicking the bell shows a list of notifications with message text and channel name
+- Mark read: `button` with text "Mark Read" or "Mark All Read" in the notification panel
+
+### Bookmarked/Saved Messages
+
+- Users can bookmark any message for personal reference (bookmark icon on hover)
+- A "Saved Messages" panel in the sidebar shows all bookmarked messages across all channels
+- Each bookmark shows the message content, sender, channel name, and timestamp
+- Users can remove bookmarks
+- Bookmarks are personal — only visible to the user who saved them
+- Bookmark list updates in real-time (e.g., if a bookmarked message is edited, the bookmark reflects the change)
+
+**UI contract:**
+- Bookmark button: `button` with text "Bookmark" or "Save" or `aria-label` containing "bookmark" or "save" visible on message hover
+- Saved panel: `button` with text "Saved" or "Bookmarks" in the sidebar, opening a panel
+- Bookmark entry: each saved message shows the message text and the channel/sender it came from
+- Remove: `button` with text "Remove" or "Unsave" next to bookmarked messages in the panel
+
+### Message Forwarding
+
+- Users can forward a message to another channel they're a member of
+- A "Forward" button appears on message hover, opening a channel picker
+- The forwarded message appears in the target channel with a "Forwarded from #original-channel by @user" attribution
+- The original message is not modified — forwarding creates a copy
+- Forwarded messages appear in real-time for all members of the target channel
+
+**UI contract:**
+- Forward button: `button` with text "Forward" or `aria-label` containing "forward" visible on message hover
+- Channel picker: a list or dropdown showing channel names the user can forward to
+- Attribution: forwarded messages display text containing "Forwarded" or "forwarded from"
+- Original unchanged: the source message has no "forwarded" indicator
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/18_slowmode.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/18_slowmode.md
new file mode 100644
index 00000000000..c06f5ee50d6
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/18_slowmode.md
@@ -0,0 +1,326 @@
+# Chat App - Full Features (21)
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
+
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing `@username`
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications (mentions, invites, etc.) with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time — new mentions appear instantly in the bell count
+- Clicking a notification navigates to the source message in its channel
+
+**UI contract:**
+- Mention highlighting: `@username` text in messages is visually distinct (bold, colored, or wrapped in a styled `span`)
+- Notification bell: `button` with text "🔔" or aria-label containing "notification" visible in the sidebar or header
+- Unread count: a numeric badge near the bell showing unread notification count
+- Notification panel: clicking the bell shows a list of notifications with message text and channel name
+- Mark read: `button` with text "Mark Read" or "Mark All Read" in the notification panel
+
+### Bookmarked/Saved Messages
+
+- Users can bookmark any message for personal reference (bookmark icon on hover)
+- A "Saved Messages" panel in the sidebar shows all bookmarked messages across all channels
+- Each bookmark shows the message content, sender, channel name, and timestamp
+- Users can remove bookmarks
+- Bookmarks are personal — only visible to the user who saved them
+- Bookmark list updates in real-time (e.g., if a bookmarked message is edited, the bookmark reflects the change)
+
+**UI contract:**
+- Bookmark button: `button` with text "Bookmark" or "Save" or `aria-label` containing "bookmark" or "save" visible on message hover
+- Saved panel: `button` with text "Saved" or "Bookmarks" in the sidebar, opening a panel
+- Bookmark entry: each saved message shows the message text and the channel/sender it came from
+- Remove: `button` with text "Remove" or "Unsave" next to bookmarked messages in the panel
+
+### Message Forwarding
+
+- Users can forward a message to another channel they're a member of
+- A "Forward" button appears on message hover, opening a channel picker
+- The forwarded message appears in the target channel with a "Forwarded from #original-channel by @user" attribution
+- The original message is not modified — forwarding creates a copy
+- Forwarded messages appear in real-time for all members of the target channel
+
+**UI contract:**
+- Forward button: `button` with text "Forward" or `aria-label` containing "forward" visible on message hover
+- Channel picker: a list or dropdown showing channel names the user can forward to
+- Attribution: forwarded messages display text containing "Forwarded" or "forwarded from"
+- Original unchanged: the source message has no "forwarded" indicator
+
+### Slow Mode
+
+- Admins can enable slow mode on a channel with a configurable cooldown (e.g., 10s, 30s, 1m, 5m)
+- When slow mode is active, users can only send one message per cooldown period
+- The UI shows a countdown timer after sending a message, disabling the input until the cooldown expires
+- A "Slow Mode" indicator is visible in the channel header when active
+- Admins are exempt from slow mode restrictions
+- Slow mode setting changes sync to all channel members in real-time
+
+**UI contract:**
+- Settings: `button` with text "Settings" or a gear icon in the room header (admin only)
+- Slow mode toggle: `input[type="checkbox"]` or `button` with text/label containing "Slow Mode"
+- Cooldown input: `input[type="number"]` or `select` for setting the cooldown duration in seconds
+- Indicator: text "Slow Mode" visible in the channel header when active
+- Enforcement: after sending, the message input is `disabled` or shows countdown text until cooldown expires
+- Admin exempt: admins can send messages without cooldown restriction
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/composed/19_polls.md b/tools/llm-oneshot/apps/chat-app/prompts/composed/19_polls.md
new file mode 100644
index 00000000000..81ca2212937
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/composed/19_polls.md
@@ -0,0 +1,345 @@
+# Chat App - Full Features (22)
+
+Create a **real-time chat app**.
+
+
+## UI & Style Guide
+
+### Layout
+- **Sidebar** (left, ~220px fixed): app title/branding, user info with status, room list, online users
+- **Main area** (right, flex): room header bar, scrollable message list, input bar pinned to bottom
+- **Panels** (right slide-in or overlay): threads, pinned messages, profiles, settings
+
+### Visual Design
+- Dark theme using the brand colors from the language section below
+- Background: darkest shade for main bg, slightly lighter for sidebar and cards
+- Text: light on dark, muted color for timestamps and secondary info
+- Borders: subtle 1px, low contrast against background
+- Consistent spacing scale (8/12/16/24px)
+- Font: system font stack, clear hierarchy (bold headers, regular body, small muted metadata)
+- Rounded corners on inputs, buttons, cards, and message containers
+
+### Components
+- **Messages**: sender name (colored) + timestamp (muted) + text. Group consecutive messages from same sender. Action buttons appear on hover only (which buttons depend on the features below).
+- **Inputs**: full-width, rounded, subtle border, placeholder text, focus ring using primary color
+- **Buttons**: filled with primary color for main actions, outlined/ghost for secondary. Clear hover and active states.
+- **Badges**: small pill-shaped with count, contrasting color (e.g., unread count on rooms)
+- **Modals/panels**: slide-in from right with subtle backdrop, or dropdown overlays
+- **Status indicators**: small colored dots (green=online, yellow=away, red=DND, grey=offline)
+- **Room list**: room names with optional icon prefix (#), active room highlighted, unread badge
+
+### Interaction & UX
+- Show loading/connecting state while backend connects (spinner or skeleton, not blank screen)
+- Empty states: helpful text when no rooms, no messages, no results ("Create a room to get started")
+- Error feedback: inline error messages or toast notifications, never silent failures
+- Smooth transitions: fade/slide for panels, modals, and state changes
+- Hover reveals: message action buttons, tooltips on reactions, user profile cards
+- Keyboard support: Enter to send messages, Escape to close modals/panels
+- Auto-scroll to newest message, with scroll-to-bottom button when scrolled up
+
+## Features
+
+**Important:** Each feature below includes a "UI contract" section specifying required element attributes for automated testing. You MUST follow these — they define the user-facing interface. Your architecture, state management, and backend design are entirely up to you.
+
+### Basic Chat Features
+
+- Users can set a display name
+- Users can create chat rooms and join/leave them
+- Users can send messages to rooms they've joined
+- Show who's online
+- Include reasonable validation (e.g., don't let users spam, enforce sensible limits)
+
+**UI contract:**
+- Name input: `placeholder` contains "name" (case-insensitive)
+- Name submit: `button` with text "Join", "Register", "Set Name", or `type="submit"`
+- Room creation: `button` with text containing "Create" or "New" or "+"
+- Room name input: `placeholder` contains "room" or "name" (case-insensitive)
+- Message input: `placeholder` contains "message" (case-insensitive)
+- Send message: pressing Enter in the message input sends the message
+- Room list: room names visible as clickable text in a sidebar or list
+- Join room: clicking room name joins/enters it, or a `button` with text "Join"
+- Leave room: `button` with text "Leave"
+- Online users: user names displayed as text in a visible user list or member panel
+
+### Typing Indicators
+
+- Show when other users are currently typing in the SAME room (typing must be scoped to room — do not broadcast typing to users in different rooms)
+- Typing indicator should automatically expire after a few seconds of inactivity
+- Display "User is typing..." or "Multiple users are typing..." in the UI
+
+**UI contract:**
+- Typing text: visible text containing "typing" (case-insensitive) when another user types
+- Auto-expiry: typing indicator text disappears within 6 seconds of inactivity
+
+### Read Receipts
+
+- Track which users have seen which messages
+- Display "Seen by X, Y, Z" under messages — only show OTHER users who have seen it, not the sender
+- Update read status in real-time as users view messages
+
+**UI contract:**
+- Receipt text: text containing "seen" or "read" (case-insensitive) appears near messages after another user views them
+- Reader names: the receipt text includes the viewing user’s display name
+
+### Unread Message Counts
+
+- Show unread message count badges on the room list
+- Track last-read position per user per room
+- Update counts in real-time as new messages arrive or are read
+
+**UI contract:**
+- Badge: a visible numeric badge (e.g., "3") appears next to room names in the sidebar when there are unread messages
+- Badge clears when the room is opened/entered
+
+### Scheduled Messages
+
+- Users can compose a message and schedule it to send at a future time
+- Show pending scheduled messages to the author (with option to cancel)
+- Message appears in the room at the scheduled time
+
+**UI contract:**
+- Schedule button: `button` with text "Schedule" or `aria-label` containing "schedule", or an icon button with `title` containing "schedule"
+- Time picker: an `input[type="datetime-local"]` or `input[type="time"]` or `input[type="number"]` for setting the send time
+- Pending list: text "Scheduled" or "Pending" visible when viewing scheduled messages
+- Cancel: `button` with text "Cancel" next to pending scheduled messages
+
+### Ephemeral/Disappearing Messages
+
+- Users can send messages that auto-delete after a set duration (e.g., 1 minute, 5 minutes)
+- Show a countdown or indicator that the message will disappear
+- Message is permanently deleted from the database when time expires
+
+**UI contract:**
+- Ephemeral toggle: `select`, `button`, or `input` with text/label containing "ephemeral", "disappear", or "expire" (case-insensitive)
+- Duration options: selectable durations (e.g., 30s, 1m, 5m)
+- Indicator: visible text containing a countdown, "expires", or "disappearing" on ephemeral messages
+- Deletion: the message text is removed from the DOM after the duration expires
+
+### Message Reactions
+
+- Users can react to messages with emoji (e.g., 👍 ❤️ 😂 😮 😢)
+- Show reaction counts on messages that update in real-time
+- Users can toggle their own reactions on/off
+- Display who reacted when hovering over reaction counts
+
+**UI contract:**
+- Reaction trigger: `button` with emoji text (👍 ❤️ 😂 😮 😢) or a `button` with text "React" / aria-label containing "react" visible on message hover
+- Reaction display: emoji + count (e.g., "👍 2") visible below or beside the reacted message
+- Toggle: clicking the same emoji again removes the user’s reaction
+- Hover info: `title` attribute on reaction element showing voter names
+
+### Message Editing with History
+
+- Users can edit their own messages after sending
+- Show "(edited)" indicator on edited messages
+- Other users can view the edit history of a message
+- Edits sync in real-time to all viewers
+
+**UI contract:**
+- Edit button: `button` with text "Edit" visible on hover over own messages
+- Edit form: an inline `input` or `textarea` replaces the message content during editing, with a "Save" `button`
+- Edited indicator: text "(edited)" visible on edited messages
+- History: clicking "(edited)" opens a view showing previous versions of the message
+
+### Real-Time Permissions
+
+- Room creators are admins and can kick/ban users from their rooms
+- Kicked users immediately lose access and stop receiving room updates
+- Admins can promote other users to admin
+- Permission changes apply instantly without requiring reconnection
+
+**UI contract:**
+- Admin indicator: text "Admin" or "ADMIN" visible for admin users in the member list
+- Members panel: `button` with text "Members" or "Manage" in the room header
+- Kick button: `button` with text "Kick" next to non-admin members
+- Promote button: `button` with text "Promote" next to non-admin members
+- Kicked feedback: kicked user sees text containing "kicked" or is redirected away from the room
+
+### Rich User Presence
+
+- Users can set their status: online, away, do-not-disturb, invisible
+- Show "Last active X minutes ago" for users who aren't online
+- Status changes sync to all viewers in real-time
+- Auto-set to "away" after period of inactivity
+
+**UI contract:**
+- Status selector: `select` or group of `button` elements with text "Online", "Away", "Do Not Disturb" / "DND", "Invisible"
+- Status indicator: colored dot or icon next to user names (green=online, yellow=away, red=DND, grey=invisible)
+- Last active: text containing "Last active" or "ago" for offline/away users
+
+### Message Threading
+
+- Users can reply to specific messages, creating a thread
+- Show reply count and preview on parent messages
+- Threaded view to see all replies to a message
+- New replies sync in real-time to thread viewers
+
+**UI contract:**
+- Reply button: `button` with text "Reply" or "💬" visible on message hover
+- Reply count: text like "N replies" or "💬 N" visible on messages that have replies
+- Thread panel: clicking the reply button/count opens a panel showing the parent message and all replies
+- Thread input: `input` or `textarea` with `placeholder` containing "reply" (case-insensitive) in the thread panel
+
+### Private Rooms and Direct Messages
+
+- Users can create private/invite-only rooms that don't appear in the public room list
+- Room creators can invite specific users by username
+- Direct messages (DMs) between two users as a special type of private room
+- Invited users receive notifications and can accept/decline invitations
+- Only members can see private room content and member lists
+
+**UI contract:**
+- Private toggle: `input[type="checkbox"]` or `button` with text/label containing "Private" during room creation
+- Private indicator: text "private" or a lock icon (🔒) visible on private rooms in the sidebar
+- Invite button: `button` with text "Invite" in the room header or members panel
+- Invitation UI: invited user sees text containing the room name with "Accept" and "Decline" `button` elements
+- DM button: `button` with text "DM" or "💬" next to user names in the user list
+
+### Room Activity Indicators
+
+- Show activity badges on rooms with recent message activity (e.g., "Active now", "Hot")
+- Display real-time message velocity or activity level per room
+- Activity indicators update live as conversation pace changes
+- Help users quickly identify where active conversations are happening
+
+**UI contract:**
+- Active badge: text "Active" or "ACTIVE" (green) visible on rooms with 1+ messages in the last 5 minutes
+- Hot badge: text "Hot" or "🔥" (orange) visible on rooms with 5+ messages in the last 2 minutes
+- Badges appear in the room list/sidebar next to room names
+
+### Draft Sync
+
+- Message drafts are saved and synced across user's devices in real-time
+- Users can resume typing where they left off on any device
+- Each room maintains its own draft per user
+- Drafts persist across sessions until sent or cleared
+
+**UI contract:**
+- Auto-save: typing in the message input saves the draft automatically (no save button needed)
+- Persistence: switching rooms and switching back restores the draft text in the message input
+- Cross-session: refreshing the page restores the draft text
+- Clear on send: sending a message clears the draft for that room
+
+### Anonymous to Registered Migration
+
+- Users can join rooms and send messages without creating an account
+- Anonymous users have a temporary identity that persists for their session
+- When an anonymous user registers, their identity and message history are preserved
+- Room memberships and all associated data transfer to the registered account
+
+**UI contract:**
+- Guest entry: `button` with text "Guest" or "Anonymous" or "Join as Guest", OR the app auto-assigns a name like "Guest-XXXXX" or "Anon-XXXXX"
+- Guest indicator: text "guest" or "anon" visible as a badge/label next to the anonymous user’s name
+- Register button: `button` with text "Register" or "Sign Up" visible to guest users
+- Registration form: `input` with `placeholder` containing "name" or "username" for choosing a display name
+- Migration: after registration, all previous messages show the new display name
+
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
+
+**UI contract:**
+- Pin button: `button` with text "Pin" or `aria-label` containing "pin" visible on message hover
+- Pinned indicator: text "pinned" or a pin icon (📌) visible on pinned messages
+- Pinned panel: `button` with text "Pinned" or "Pins" in the channel header, opening a panel listing all pinned messages
+- Unpin: `button` with text "Unpin" on pinned messages (in the panel or on hover)
+
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time — message attributions, member lists, online user lists, and DM headers all reflect the new name/avatar immediately
+- Profile changes are visible to all users across all channels without page refresh
+
+**UI contract:**
+- Profile edit: `button` with text "Edit Profile" or "Profile" or a settings/gear icon accessible from the sidebar
+- Bio input: `input` or `textarea` with `placeholder` containing "bio" or "status" (case-insensitive)
+- Profile card: clicking a username opens a popover/modal showing the user’s name, bio, and avatar
+- Name propagation: changing display name updates all message attributions in real-time
+
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing `@username`
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications (mentions, invites, etc.) with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time — new mentions appear instantly in the bell count
+- Clicking a notification navigates to the source message in its channel
+
+**UI contract:**
+- Mention highlighting: `@username` text in messages is visually distinct (bold, colored, or wrapped in a styled `span`)
+- Notification bell: `button` with text "🔔" or aria-label containing "notification" visible in the sidebar or header
+- Unread count: a numeric badge near the bell showing unread notification count
+- Notification panel: clicking the bell shows a list of notifications with message text and channel name
+- Mark read: `button` with text "Mark Read" or "Mark All Read" in the notification panel
+
+### Bookmarked/Saved Messages
+
+- Users can bookmark any message for personal reference (bookmark icon on hover)
+- A "Saved Messages" panel in the sidebar shows all bookmarked messages across all channels
+- Each bookmark shows the message content, sender, channel name, and timestamp
+- Users can remove bookmarks
+- Bookmarks are personal — only visible to the user who saved them
+- Bookmark list updates in real-time (e.g., if a bookmarked message is edited, the bookmark reflects the change)
+
+**UI contract:**
+- Bookmark button: `button` with text "Bookmark" or "Save" or `aria-label` containing "bookmark" or "save" visible on message hover
+- Saved panel: `button` with text "Saved" or "Bookmarks" in the sidebar, opening a panel
+- Bookmark entry: each saved message shows the message text and the channel/sender it came from
+- Remove: `button` with text "Remove" or "Unsave" next to bookmarked messages in the panel
+
+### Message Forwarding
+
+- Users can forward a message to another channel they're a member of
+- A "Forward" button appears on message hover, opening a channel picker
+- The forwarded message appears in the target channel with a "Forwarded from #original-channel by @user" attribution
+- The original message is not modified — forwarding creates a copy
+- Forwarded messages appear in real-time for all members of the target channel
+
+**UI contract:**
+- Forward button: `button` with text "Forward" or `aria-label` containing "forward" visible on message hover
+- Channel picker: a list or dropdown showing channel names the user can forward to
+- Attribution: forwarded messages display text containing "Forwarded" or "forwarded from"
+- Original unchanged: the source message has no "forwarded" indicator
+
+### Slow Mode
+
+- Admins can enable slow mode on a channel with a configurable cooldown (e.g., 10s, 30s, 1m, 5m)
+- When slow mode is active, users can only send one message per cooldown period
+- The UI shows a countdown timer after sending a message, disabling the input until the cooldown expires
+- A "Slow Mode" indicator is visible in the channel header when active
+- Admins are exempt from slow mode restrictions
+- Slow mode setting changes sync to all channel members in real-time
+
+**UI contract:**
+- Settings: `button` with text "Settings" or a gear icon in the room header (admin only)
+- Slow mode toggle: `input[type="checkbox"]` or `button` with text/label containing "Slow Mode"
+- Cooldown input: `input[type="number"]` or `select` for setting the cooldown duration in seconds
+- Indicator: text "Slow Mode" visible in the channel header when active
+- Enforcement: after sending, the message input is `disabled` or shows countdown text until cooldown expires
+- Admin exempt: admins can send messages without cooldown restriction
+
+### Polls
+
+- Users can create a poll in a channel with a question and 2-6 options
+- Each user can vote for one option (single-choice) — no double voting
+- Vote counts update in real-time for all users in the channel as votes come in
+- Users can change their vote (previous vote is removed, new vote is added atomically)
+- The poll creator can close the poll, preventing further votes
+- Show who voted for each option (voter names visible on hover or in a detail view)
+
+**UI contract:**
+- Create poll: `button` with text "Poll" or "Create Poll" accessible from the message area
+- Question input: `input` or `textarea` with `placeholder` containing "question" (case-insensitive)
+- Option inputs: multiple `input` elements with `placeholder` containing "option" or "choice" (case-insensitive)
+- Vote: clicking an option `button` or `label` casts a vote
+- Vote count: each option shows a numeric vote count that updates in real-time
+- Close poll: `button` with text "Close" or "End Poll" visible to the poll creator
+- Closed state: text "Closed" or "Ended" visible on closed polls
+- Voter names: `title` attribute or expandable section showing who voted for each option
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/05_scheduled_messages.md b/tools/llm-oneshot/apps/chat-app/prompts/features/02_scheduled.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/05_scheduled_messages.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/02_scheduled.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/02_typing_indicators.md b/tools/llm-oneshot/apps/chat-app/prompts/features/02_typing_indicators.md
deleted file mode 100644
index c22b413af9f..00000000000
--- a/tools/llm-oneshot/apps/chat-app/prompts/features/02_typing_indicators.md
+++ /dev/null
@@ -1,5 +0,0 @@
-### Typing Indicators
-
-- Show when other users are currently typing in a room
-- Typing indicator should automatically expire after a few seconds of inactivity
-- Display "User is typing..." or "Multiple users are typing..." in the UI
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/06_ephemeral_messages.md b/tools/llm-oneshot/apps/chat-app/prompts/features/03_ephemeral.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/06_ephemeral_messages.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/03_ephemeral.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/03_read_receipts.md b/tools/llm-oneshot/apps/chat-app/prompts/features/03_read_receipts.md
deleted file mode 100644
index 0793b37127f..00000000000
--- a/tools/llm-oneshot/apps/chat-app/prompts/features/03_read_receipts.md
+++ /dev/null
@@ -1,5 +0,0 @@
-### Read Receipts
-
-- Track which users have seen which messages
-- Display "Seen by X, Y, Z" under messages (or a seen indicator)
-- Update read status in real-time as users view messages
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/07_reactions.md b/tools/llm-oneshot/apps/chat-app/prompts/features/04_reactions.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/07_reactions.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/04_reactions.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/04_unread_counts.md b/tools/llm-oneshot/apps/chat-app/prompts/features/04_unread_counts.md
deleted file mode 100644
index b115043acba..00000000000
--- a/tools/llm-oneshot/apps/chat-app/prompts/features/04_unread_counts.md
+++ /dev/null
@@ -1,5 +0,0 @@
-### Unread Message Counts
-
-- Show unread message count badges on the room list
-- Track last-read position per user per room
-- Update counts in real-time as new messages arrive or are read
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/08_edit_history.md b/tools/llm-oneshot/apps/chat-app/prompts/features/05_edit_history.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/08_edit_history.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/05_edit_history.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/09_realtime_permissions.md b/tools/llm-oneshot/apps/chat-app/prompts/features/06_permissions.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/09_realtime_permissions.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/06_permissions.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/10_rich_presence.md b/tools/llm-oneshot/apps/chat-app/prompts/features/07_presence.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/10_rich_presence.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/07_presence.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/11_threading.md b/tools/llm-oneshot/apps/chat-app/prompts/features/08_threading.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/11_threading.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/08_threading.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/12_private_rooms.md b/tools/llm-oneshot/apps/chat-app/prompts/features/09_private_rooms.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/12_private_rooms.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/09_private_rooms.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/13_activity_indicators.md b/tools/llm-oneshot/apps/chat-app/prompts/features/10_activity.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/13_activity_indicators.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/10_activity.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/14_draft_sync.md b/tools/llm-oneshot/apps/chat-app/prompts/features/11_drafts.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/14_draft_sync.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/11_drafts.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/15_anonymous_migration.md b/tools/llm-oneshot/apps/chat-app/prompts/features/12_anon_migration.md
similarity index 100%
rename from tools/llm-oneshot/apps/chat-app/prompts/features/15_anonymous_migration.md
rename to tools/llm-oneshot/apps/chat-app/prompts/features/12_anon_migration.md
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/13_pinned.md b/tools/llm-oneshot/apps/chat-app/prompts/features/13_pinned.md
new file mode 100644
index 00000000000..52e86d44d61
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/13_pinned.md
@@ -0,0 +1,7 @@
+### Pinned Messages
+
+- Users can pin important messages in a channel (admins and message authors can pin)
+- Pinned messages show a pin indicator in the message list
+- A "Pinned Messages" panel accessible from the channel header shows all pins for that channel
+- Users can unpin messages
+- Pin/unpin actions sync to all users in the channel in real-time
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/14_profiles.md b/tools/llm-oneshot/apps/chat-app/prompts/features/14_profiles.md
new file mode 100644
index 00000000000..8a5a7a2f39e
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/14_profiles.md
@@ -0,0 +1,6 @@
+### User Profiles
+
+- Users can edit their profile: display name, bio/status message, and avatar URL
+- Clicking on a username anywhere in the app opens a profile card/popover showing their info
+- When a user updates their profile, the changes propagate everywhere in real-time
+- Profile changes are visible to all users across all channels without page refresh
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/15_mentions.md b/tools/llm-oneshot/apps/chat-app/prompts/features/15_mentions.md
new file mode 100644
index 00000000000..6f8a9c4e70b
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/15_mentions.md
@@ -0,0 +1,9 @@
+### @Mentions and Notification Feed
+
+- Users can @mention other users in messages by typing @username
+- Mentioned usernames are highlighted/styled in the message text
+- When a user is mentioned, a notification is created for them
+- Notification bell icon in the sidebar/header shows unread notification count
+- Clicking the bell opens a notification panel listing all notifications with the source message and channel
+- Users can mark individual notifications as read, or mark all as read
+- Notifications update in real-time
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/16_bookmarks.md b/tools/llm-oneshot/apps/chat-app/prompts/features/16_bookmarks.md
new file mode 100644
index 00000000000..426729c02eb
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/16_bookmarks.md
@@ -0,0 +1,8 @@
+### Bookmarked/Saved Messages
+
+- Users can bookmark any message for personal reference (bookmark icon on hover)
+- A "Saved Messages" panel in the sidebar shows all bookmarked messages across all channels
+- Each bookmark shows the message content, sender, channel name, and timestamp
+- Users can remove bookmarks
+- Bookmarks are personal — only visible to the user who saved them
+- Bookmark list updates in real-time
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/17_forwarding.md b/tools/llm-oneshot/apps/chat-app/prompts/features/17_forwarding.md
new file mode 100644
index 00000000000..739775374c5
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/17_forwarding.md
@@ -0,0 +1,7 @@
+### Message Forwarding
+
+- Users can forward a message to another channel they're a member of
+- A "Forward" button appears on message hover, opening a channel picker
+- The forwarded message appears in the target channel with a "Forwarded from #original-channel by @user" attribution
+- The original message is not modified — forwarding creates a copy
+- Forwarded messages appear in real-time for all members of the target channel
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/18_slowmode.md b/tools/llm-oneshot/apps/chat-app/prompts/features/18_slowmode.md
new file mode 100644
index 00000000000..3f40218a2fc
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/18_slowmode.md
@@ -0,0 +1,8 @@
+### Slow Mode
+
+- Admins can enable slow mode on a channel with a configurable cooldown (e.g., 10s, 30s, 1m, 5m)
+- When slow mode is active, users can only send one message per cooldown period
+- The UI shows a countdown timer after sending a message, disabling the input until the cooldown expires
+- A "Slow Mode" indicator is visible in the channel header when active
+- Admins are exempt from slow mode restrictions
+- Slow mode setting changes sync to all channel members in real-time
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/features/19_polls.md b/tools/llm-oneshot/apps/chat-app/prompts/features/19_polls.md
new file mode 100644
index 00000000000..24a261c8f0b
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/features/19_polls.md
@@ -0,0 +1,8 @@
+### Polls
+
+- Users can create a poll in a channel with a question and 2-6 options
+- Each user can vote for one option (single-choice)
+- Vote counts update in real-time for all users in the channel as votes come in
+- Users can change their vote
+- The poll creator can close the poll, preventing further votes
+- Show who voted for each option (voter names visible on hover or in a detail view)
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-postgres.md b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-postgres.md
index 70f759aee9d..3839ac23918 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-postgres.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-postgres.md
@@ -22,6 +22,23 @@ Database name: `chat-app`
   - `.../client/` (client-side TypeScript/React)
 - Keep it minimal and readable.
 
+## Branding & Styling
+
+- App title: **"PostgreSQL Chat"**
+- Dark theme using official PostgreSQL brand colors:
+  - Primary: `#336791` (PostgreSQL blue)
+  - Primary hover: `#008bb9` (lighter PostgreSQL blue)
+  - Secondary: `#0064a5` (dark PostgreSQL blue)
+  - Background: `#1a1a2e` (dark navy)
+  - Surface: `#16213e` (slightly lighter)
+  - Border: `#2a2a4a` (muted border)
+  - Text: `#e8e8e8` (light gray)
+  - Text muted: `#848484` (PostgreSQL light grey)
+  - Accent: `#008bb9` (PostgreSQL light blue)
+  - Success: `#27ae60` (green for online indicators)
+  - Warning: `#f26522` (PostgreSQL light orange)
+  - Danger: `#cc3b03` (PostgreSQL dark orange/red)
+
 ## Output
 
 Return only code blocks with file headers for the files you create.
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-spacetime.md b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-spacetime.md
index 97c2d47218a..912540a8a21 100644
--- a/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-spacetime.md
+++ b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-spacetime.md
@@ -22,6 +22,24 @@ Module name: `chat-app`
   - `.../client/src/` (client-side TypeScript/React)
 - Keep it minimal and readable.
 
+## Branding & Styling
+
+- App title: **"SpacetimeDB Chat"**
+- Dark theme using official SpacetimeDB brand colors:
+  - Primary: `#4cf490` (SpacetimeDB green)
+  - Primary hover: `#4cf490bf` (green 75% opacity)
+  - Secondary: `#a880ff` (SpacetimeDB purple)
+  - Background: `#0d0d0e` (shade2 — near black)
+  - Surface: `#141416` (shade1 — slightly lighter)
+  - Border: `#202126` (n6)
+  - Text: `#e6e9f0` (n1 — light gray)
+  - Text muted: `#6f7987` (n4)
+  - Accent: `#02befa` (SpacetimeDB blue)
+  - Success: `#4cf490` (green — same as primary)
+  - Warning: `#fbdc8e` (SpacetimeDB yellow)
+  - Danger: `#ff4c4c` (SpacetimeDB red)
+  - Gradient (optional, for headers): `linear-gradient(266deg, #4cf490 0%, #8a38f5 100%)` (green to purple)
+
 ## Output
 
 Return only code blocks with file headers for the files you create.
diff --git a/tools/llm-oneshot/package-lock.json b/tools/llm-oneshot/package-lock.json
new file mode 100644
index 00000000000..4830bd1882d
--- /dev/null
+++ b/tools/llm-oneshot/package-lock.json
@@ -0,0 +1,133 @@
+{
+  "name": "llm-oneshot",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "llm-oneshot",
+      "devDependencies": {
+        "@types/node": "^20.0.0",
+        "tsx": "^4.7.0"
+      }
+    },
+    "node_modules/@esbuild/win32-x64": {
+      "version": "0.27.4",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.4.tgz",
+      "integrity": "sha512-+knoa0BDoeXgkNvvV1vvbZX4+hizelrkwmGJBdT17t8FNPwG2lKemmuMZlmaNQ3ws3DKKCxpb4zRZEIp3UxFCg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "20.19.37",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.37.tgz",
+      "integrity": "sha512-8kzdPJ3FsNsVIurqBs7oodNnCEVbni9yUEkaHbgptDACOPW04jimGagZ51E6+lXUwJjgnBw+hyko/lkFWCldqw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.21.0"
+      }
+    },
+    "node_modules/esbuild": {
+      "version": "0.27.4",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz",
+      "integrity": "sha512-Rq4vbHnYkK5fws5NF7MYTU68FPRE1ajX7heQ/8QXXWqNgqqJ/GkmmyxIzUnf2Sr/bakf8l54716CcMGHYhMrrQ==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "@esbuild/aix-ppc64": "0.27.4",
+        "@esbuild/android-arm": "0.27.4",
+        "@esbuild/android-arm64": "0.27.4",
+        "@esbuild/android-x64": "0.27.4",
+        "@esbuild/darwin-arm64": "0.27.4",
+        "@esbuild/darwin-x64": "0.27.4",
+        "@esbuild/freebsd-arm64": "0.27.4",
+        "@esbuild/freebsd-x64": "0.27.4",
+        "@esbuild/linux-arm": "0.27.4",
+        "@esbuild/linux-arm64": "0.27.4",
+        "@esbuild/linux-ia32": "0.27.4",
+        "@esbuild/linux-loong64": "0.27.4",
+        "@esbuild/linux-mips64el": "0.27.4",
+        "@esbuild/linux-ppc64": "0.27.4",
+        "@esbuild/linux-riscv64": "0.27.4",
+        "@esbuild/linux-s390x": "0.27.4",
+        "@esbuild/linux-x64": "0.27.4",
+        "@esbuild/netbsd-arm64": "0.27.4",
+        "@esbuild/netbsd-x64": "0.27.4",
+        "@esbuild/openbsd-arm64": "0.27.4",
+        "@esbuild/openbsd-x64": "0.27.4",
+        "@esbuild/openharmony-arm64": "0.27.4",
+        "@esbuild/sunos-x64": "0.27.4",
+        "@esbuild/win32-arm64": "0.27.4",
+        "@esbuild/win32-ia32": "0.27.4",
+        "@esbuild/win32-x64": "0.27.4"
+      }
+    },
+    "node_modules/get-tsconfig": {
+      "version": "4.13.7",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz",
+      "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-pkg-maps": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
+      }
+    },
+    "node_modules/resolve-pkg-maps": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
+      }
+    },
+    "node_modules/tsx": {
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
+      "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "esbuild": "~0.27.0",
+        "get-tsconfig": "^4.7.5"
+      },
+      "bin": {
+        "tsx": "dist/cli.mjs"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "6.21.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
+      "dev": true,
+      "license": "MIT"
+    }
+  }
+}
diff --git a/tools/llm-sequential-upgrade/.gitignore b/tools/llm-sequential-upgrade/.gitignore
new file mode 100644
index 00000000000..84ae31595e1
--- /dev/null
+++ b/tools/llm-sequential-upgrade/.gitignore
@@ -0,0 +1,23 @@
+# Node modules and build artifacts inside generated apps
+**/results/**/node_modules/
+**/results/**/dist/
+**/results/**/.vite/
+**/results/**/drizzle/
+
+# Telemetry backup files
+**/telemetry/*.jsonl.bak
+
+
+# Playwright
+**/playwright/node_modules/
+**/playwright/test-results/
+**/playwright/playwright-report/
+
+# Isolation git repos inside generated apps (created by run.sh, cleaned up after)
+**/results/**/.git/
+# OTel collector live dump - not tracked
+telemetry/logs.jsonl
+telemetry/metrics.jsonl
+
+# Raw telemetry contains PII (email, account IDs) - store privately
+**/telemetry/**/raw-telemetry.jsonl
diff --git a/tools/llm-sequential-upgrade/CLAUDE.md b/tools/llm-sequential-upgrade/CLAUDE.md
new file mode 100644
index 00000000000..69e38a4c42c
--- /dev/null
+++ b/tools/llm-sequential-upgrade/CLAUDE.md
@@ -0,0 +1,90 @@
+# Sequential Upgrade: LLM Cost-to-Done Benchmark
+
+You are running an automated benchmark that measures the **total cost to build a fully working chat app** — comparing SpacetimeDB vs PostgreSQL.
+
+Your job is to **generate, build, deploy, and fix** the app. Grading happens in a separate manual session — you do NOT test in the browser.
+
+---
+
+## Path Convention
+
+All file paths are **relative to the `llm-sequential-upgrade/` directory** unless stated otherwise. `../` means going up to `tools/`.
+
+Examples:
+- `backends/spacetime.md` → `llm-sequential-upgrade/backends/spacetime.md`
+- `../llm-oneshot/apps/chat-app/prompts/composed/01_basic.md` → `tools/llm-oneshot/apps/chat-app/prompts/composed/01_basic.md`
+
+---
+
+## What You Do
+
+Depending on the mode passed in the launch prompt:
+
+| Mode | Task |
+|------|------|
+| **generate** | Create the app from scratch for the given level |
+| **upgrade** | Add new features from the next level prompt to existing code |
+| **fix** | Read BUG_REPORT.md, fix the listed bugs, redeploy |
+
+**CRITICAL:** Read `backends/<backend>.md` first — it has all setup, build, and deploy instructions.
+
+---
+
+## Anti-Contamination
+
+Do NOT read any files under:
+- `../llm-oneshot/apps/chat-app/typescript/` (graded reference implementations)
+- `../llm-oneshot/apps/chat-app/staging/`
+- Any other AI-generated app code in this workspace
+
+Only read files you created, the backend instructions, and the feature prompts.
+
+---
+
+## Generate / Upgrade
+
+1. Read `backends/<backend>.md` for pre-flight checks, phases, and deploy steps
+2. Read the language setup: `../llm-oneshot/apps/chat-app/prompts/language/typescript-<backend>.md`
+3. Read the feature prompt: `../llm-oneshot/apps/chat-app/prompts/composed/<NN>_<name>.md`
+4. Follow the phases in the backend file (generate backend → bindings → client → verify → deploy)
+5. Output `DEPLOY_COMPLETE` when the dev server is confirmed running
+
+For **upgrade**: only add the NEW features from the target level. Do not rewrite existing working features.
+
+---
+
+## Fix
+
+1. Read `CLAUDE.md` in the app directory for architecture and deploy instructions
+2. Read `BUG_REPORT.md` — it describes exactly what's broken
+3. Read the relevant source files
+4. Fix each bug, redeploy, verify the server is running
+5. Append to `ITERATION_LOG.md` (see format below)
+6. Output `FIX_COMPLETE`
+
+Do NOT do browser testing — that happens in the grading session.
+
+---
+
+## ITERATION_LOG.md
+
+Append to this file after every fix. Never overwrite.
+
+```markdown
+## Iteration N — Fix (HH:MM)
+
+**Category:** Feature Broken | Compilation/Build | Runtime/Crash | Integration | Data/State
+**What broke:** <short description>
+**Root cause:** <what was actually wrong>
+**What I fixed:** <what changed>
+**Files changed:** <file (lines)>
+**Redeploy:** Client only | Server only | Both
+
+**Server verified:** Client at http://localhost:<port> ✓
+```
+
+---
+
+## Cost Tracking
+
+Cost is tracked automatically via OpenTelemetry — do NOT estimate tokens or produce a COST_REPORT.md. That is generated automatically after the session ends.
diff --git a/tools/llm-sequential-upgrade/DEVELOP.md b/tools/llm-sequential-upgrade/DEVELOP.md
new file mode 100644
index 00000000000..f77bc520dc7
--- /dev/null
+++ b/tools/llm-sequential-upgrade/DEVELOP.md
@@ -0,0 +1,312 @@
+# Sequential Upgrade — Developer Guide
+
+How to set up, run, and interpret the LLM cost-to-done benchmark.
+
+---
+
+## What This Does
+
+Measures the **total token cost to reach a fully working chat app** by alternating between two agents:
+
+1. **Code Agent** (headless, `run.sh`) — generates code, fixes bugs, deploys. Token-tracked via OpenTelemetry.
+2. **Grade Agent** (interactive Claude Code) — tests in Chrome via MCP, writes bug reports. NOT token-tracked.
+
+Only the Code Agent's tokens count toward the benchmark. Grading cost is the same for both SpacetimeDB and PostgreSQL, so it's excluded.
+
+### The Loop
+
+```
+run.sh --level 1          → Code Agent generates & deploys app (tokens tracked)
+  ↓
+You (in Claude Code)      → Grade Agent tests in Chrome, writes BUG_REPORT.md
+  ↓
+run.sh --fix <app-dir>    → Code Agent reads bugs, fixes code, redeploys (tokens tracked)
+  ↓
+You (in Claude Code)      → Grade Agent retests, writes updated BUG_REPORT.md or GRADING_RESULTS.md
+  ↓
+... repeat until all features pass or iteration limit hit
+```
+
+---
+
+## Prerequisites
+
+### 1. SpacetimeDB
+
+```bash
+spacetime start
+```
+
+### 2. Docker (for OpenTelemetry Collector)
+
+```bash
+cd tools/llm-oneshot/llm-sequential-upgrade
+docker compose -f docker-compose.otel.yaml up -d
+```
+
+### 3. Claude Code CLI
+
+Needs `claude` on PATH, or `npx @anthropic-ai/claude-code` works as fallback.
+
+### 4. Chrome + Claude MCP Extension
+
+Required for the grading agent (interactive session). Chrome must be open with the "Claude in Chrome" MCP extension active.
+
+### 5. Node.js
+
+Required for SpacetimeDB TypeScript backend, Vite dev server, and `parse-telemetry.mjs`.
+
+---
+
+## Running a Benchmark
+
+### Step 1: Generate & Deploy (headless, token-tracked)
+
+```bash
+cd tools/llm-oneshot/llm-sequential-upgrade
+./run.sh --level 1 --backend spacetime
+```
+
+This:
+1. Runs pre-flight checks (SpacetimeDB, Docker, OTel, prompts)
+2. Launches headless Claude Code with OTel telemetry enabled
+3. Generates backend + client code, builds, deploys (SpacetimeDB: localhost:6173, PostgreSQL: localhost:6273)
+4. Parses telemetry → `COST_REPORT.md`
+5. Prints the app directory path
+
+### Step 2: Grade (interactive, not token-tracked)
+
+In this Claude Code session (or a new interactive one), say:
+
+```
+Grade the app at sequential-upgrade/sequential-upgrade-YYYYMMDD/spacetime/results/chat-app-<timestamp>
+```
+
+Or use the helper script:
+```bash
+./grade.sh sequential-upgrade/sequential-upgrade-YYYYMMDD/spacetime/results/chat-app-<timestamp>
+```
+
+The grading agent will:
+1. Open Chrome, navigate to the backend's port (6173 for SpacetimeDB, 6273 for PostgreSQL)
+2. Test each feature using the test plans
+3. Score features 0-3
+4. If bugs found: write `BUG_REPORT.md` in the app directory
+5. Write/update `ITERATION_LOG.md` and `GRADING_RESULTS.md`
+
+### Step 3: Fix (headless, token-tracked)
+
+If bugs were found:
+
+```bash
+./run.sh --fix sequential-upgrade/sequential-upgrade-YYYYMMDD/spacetime/results/chat-app-<timestamp>
+```
+
+This:
+1. Reads `BUG_REPORT.md` from the app directory
+2. Fixes the code, republishes if needed
+3. Tokens tracked via OTel (cumulative with Step 1)
+
+### Step 4: Re-grade
+
+Back in Claude Code:
+```
+Re-grade the app at sequential-upgrade/sequential-upgrade-YYYYMMDD/spacetime/results/chat-app-<timestamp>
+```
+
+Repeat Steps 3-4 until all features pass.
+
+### Options
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--level` | `1` | Prompt level (1-12). Level 1 = 4 features, Level 12 = all 15 |
+| `--backend` | `spacetime` | `spacetime` or `postgres` |
+| `--variant` | `sequential-upgrade` | Test variant: `sequential-upgrade` or `one-shot` |
+| `--fix <dir>` | — | Fix mode: read BUG_REPORT.md, fix code, redeploy |
+| `--upgrade <dir>` | — | Upgrade mode: add features to existing app |
+| `--resume-session` | — | Resume prior Claude session for cache reuse |
+
+### Recommended Test Levels
+
+| Level | Features | Est. Duration | Good For |
+|-------|----------|---------------|----------|
+| 1 | 4 (basic chat, typing, receipts, unread) | 5-15 min | Pipeline validation |
+| 5 | 8 (+ scheduled, ephemeral, reactions, edit) | 15-30 min | Mid-complexity |
+| 12 | All 15 features | 30-60+ min | Full benchmark |
+
+---
+
+## Output Files
+
+### Per-run directory structure
+```
+llm-sequential-upgrade/<variant>/<variant>-YYYYMMDD/
+  METRICS_DATA.json       # Comparison metrics (generated after all grading)
+  METRICS_REPORT.md       # Human-readable benchmark report
+  <backend>/              # e.g. spacetime/ or postgres/
+    inputs/               # Frozen snapshot of all inputs used for this run
+    results/
+      chat-app-<timestamp>/
+        GRADING_RESULTS.md  # Per-feature scores (written by grade agent)
+        ITERATION_LOG.md    # Per-iteration progress log (both agents append)
+        BUG_REPORT.md       # Current bugs for fix agent to read (deleted when all pass)
+        backend/            # Generated SpacetimeDB backend (spacetime only)
+        server/             # Generated Express server (postgres only)
+        client/             # Generated React client
+    telemetry/
+      <backend>-level<N>-<timestamp>/
+        metadata.json       # Run parameters, timing, session ID
+        cost-summary.json   # Parsed token counts and total cost
+        COST_REPORT.md      # Per-call breakdown
+        raw-telemetry.jsonl # OTel records for this session
+```
+
+### Shared telemetry (OTel Collector output)
+```
+llm-sequential-upgrade/telemetry/
+  logs.jsonl              # Raw OTLP log records (shared across all runs)
+  metrics.jsonl           # Raw OTLP metrics
+```
+
+---
+
+## Understanding the Results
+
+### GRADING_RESULTS.md
+
+- **Feature scores**: 0-3 per feature, scored from observed browser behavior
+- **Reprompt log**: Every bug fix iteration with category and description
+- **Reprompt efficiency**: 0-10 scale (0 reprompts = 10, 16+ reprompts = 0)
+
+### COST_REPORT.md
+
+- **Total tokens**: Exact input + output token counts across all Code Agent API calls
+- **Cache read tokens**: Tokens served from prompt cache (reduced cost)
+- **Cost (USD)**: Total dollar cost of the code generation + fix iterations
+- **Per-call breakdown**: Every API call with model, tokens, cost, duration
+
+### Key Comparison Metrics
+
+| Metric | What It Shows |
+|--------|---------------|
+| Total tokens to done | Raw LLM efficiency — fewer = easier to build with |
+| Iterations to done | Fix cycles needed — fewer = less debugging |
+| Final feature score | Quality of the final app |
+| Lines of code | Code complexity — smaller = simpler for LLMs |
+| External dependencies | Infrastructure complexity |
+
+---
+
+## Troubleshooting
+
+### OTel Collector not receiving data
+
+```bash
+docker compose -f docker-compose.otel.yaml logs
+ls -la telemetry/logs.jsonl
+```
+
+### SpacetimeDB publish fails
+
+```bash
+spacetime server ping local
+spacetime start  # if not running
+```
+
+### Chrome MCP tools not working (grading session)
+
+- Chrome must be open before starting the grading session
+- "Claude in Chrome" extension must be installed and active
+- Only works in interactive Claude Code sessions (not `--print` mode)
+
+### Session runs out of context
+
+- Try a lower level first
+- The ITERATION_LOG.md preserves progress even if a session dies
+
+---
+
+## Running a Full Comparison
+
+### Sequential Upgrade (default)
+
+```bash
+# Generate level 1, then upgrade through each level
+./run.sh --level 1 --backend spacetime
+# (grade, fix loop...)
+./run.sh --upgrade <app-dir> --level 2
+# ... continue through level 12
+
+# Same for PostgreSQL
+./run.sh --level 1 --backend postgres
+# (grade, fix loop...)
+./run.sh --upgrade <app-dir> --level 2
+# ... continue through level 12
+```
+
+### One-Shot
+
+```bash
+# Generate all 15 features in a single prompt
+./run.sh --variant one-shot --backend spacetime
+./run.sh --variant one-shot --backend postgres
+```
+
+---
+
+## File Structure
+
+```
+llm-sequential-upgrade/
+  CLAUDE.md                        # Instructions for the Code Agent
+  DEVELOP.md                       # This file (for humans)
+  run.sh                           # Code Agent launcher (generate/fix/upgrade)
+  grade.sh                         # Grade Agent launcher (interactive Chrome MCP)
+  grade-playwright.sh              # Grade via Playwright (optional, deterministic)
+  docker-compose.otel.yaml         # OTel Collector container
+  otel-collector-config.yaml       # Collector config (OTLP → JSON files)
+  parse-telemetry.mjs              # Telemetry → COST_REPORT.md
+  backends/
+    spacetime.md                   # SpacetimeDB-specific phases
+    spacetime-sdk-rules.md         # SpacetimeDB SDK patterns
+    spacetime-templates.md         # Code templates
+    postgres.md                    # PostgreSQL-specific phases
+  test-plans/
+    feature-01-basic-chat.md       # Per-feature browser test scripts
+    ...
+    feature-15-anonymous-migration.md
+    playwright/                    # Optional Playwright test suite
+  telemetry/                       # Shared OTel Collector output
+  sequential-upgrade/              # Sequential upgrade test variant
+    sequential-upgrade-YYYYMMDD/   # Dated run with results, telemetry, inputs
+  one-shot/                        # One-shot test variant
+    one-shot-YYYYMMDD/
+```
+
+---
+
+## Architecture
+
+```
+                    TOKEN-TRACKED                      NOT TRACKED
+               ┌─────────────────────┐          ┌─────────────────────┐
+               │                     │          │                     │
+   run.sh ────▶│  Code Agent         │          │  Grade Agent        │◀──── You
+               │  (claude --print)   │          │  (interactive CC)   │      (in Claude Code)
+               │                     │          │                     │
+               │  • Generate code    │          │  • Chrome MCP       │
+               │  • Build & deploy   │   Bug    │  • Test features    │
+               │  • Fix bugs ◀───────│── Report │  • Score 0-3        │
+               │  • Redeploy         │──────────▶  • Write BUG_REPORT │
+               │                     │          │  • Write GRADING    │
+               └────────┬────────────┘          └─────────────────────┘
+                        │
+               OTel telemetry
+                        │
+               ┌────────▼────────────┐
+               │  OTel Collector     │
+               │  → logs.jsonl       │
+               │  → COST_REPORT.md   │
+               └─────────────────────┘
+```
diff --git a/tools/llm-sequential-upgrade/GRADING.md b/tools/llm-sequential-upgrade/GRADING.md
new file mode 100644
index 00000000000..0a0e02cf68e
--- /dev/null
+++ b/tools/llm-sequential-upgrade/GRADING.md
@@ -0,0 +1,122 @@
+# Grading Instructions
+
+This is the manual grading session. The app has already been generated and deployed by the automated run. Your job is to test every feature in the browser and score it.
+
+---
+
+## Setup — Two Independent Users
+
+You need TWO Chrome browser profiles so each user gets completely separate identity (localStorage, cookies, WebSocket connections).
+
+1. **Browser A (default profile):** Navigate to the app URL and register as "Alice"
+   - SpacetimeDB: `http://localhost:6173`
+   - PostgreSQL: `http://localhost:6273`
+
+2. **Switch to Browser B:** Use `switch_browser` to switch to the second Chrome profile
+
+3. **Browser B:** Navigate to the SAME URL, register as "Bob"
+
+Use `switch_browser` to go back and forth. Both browsers connect to the same backend but have separate storage and WebSocket connections.
+
+---
+
+## Chrome MCP Tools
+
+- `navigate` — go to URL
+- `read_page` — accessibility tree for element discovery
+- `get_page_text` — visible text
+- `find` — natural language element search
+- `computer` — click, type, scroll, screenshot
+- `form_input` — fill form fields
+- `javascript_tool` — run JS for verification
+- `read_console_messages` — check for errors
+- `gif_creator` — record timing-sensitive features (typing indicators, ephemeral messages)
+
+### Adaptive Element Discovery
+
+Every generated app has different HTML. Use this fallback chain:
+1. `find("send message button")`
+2. `read_page` — identify by role/text
+3. `get_page_text`
+4. `javascript_tool` — query DOM directly
+
+---
+
+## Per-Feature Testing
+
+Read the test plan from `test-plans/feature-NN-*.md` for each feature. Test in order (1 through N).
+
+For each feature:
+1. Execute the test plan steps
+2. Record pass/fail for each criterion
+3. Screenshot at key verification points
+4. Check `read_console_messages` for JS errors
+5. Score 0–3 per the rubric below
+6. **IMMEDIATELY** write the score block to `GRADING_RESULTS.md` — do not wait until the end
+
+```markdown
+## Feature N: <Name> (Score: X / 3)
+- [x] <criterion> (1pt)
+- [ ] <criterion> (1pt)
+**Browser Test Observations:** ...
+---
+```
+
+---
+
+## Scoring Rules
+
+- Score ONLY from observed browser behavior — never from source code
+- If a criterion wasn't testable (UI didn't load, element not found), score 0
+- When in doubt, score lower
+- JavaScript console errors during a feature test cap that feature at 2/3
+- Real-time features that only work after page refresh cap at 1/3
+
+---
+
+## GRADING_RESULTS.md Format
+
+```markdown
+# Chat App Grading Results
+
+**Model:** Claude Sonnet 4.6
+**Date:** <YYYY-MM-DD>
+**Backend:** spacetime | postgres
+**Level:** <N>
+**Grading Method:** Manual browser interaction
+
+---
+
+## Feature 1: <Name> (Score: X / 3)
+- [x] <criterion> (1pt)
+...
+**Browser Test Observations:** ...
+
+---
+
+## Summary
+
+| Feature | Score | Notes |
+|---------|-------|-------|
+| 1. Basic Chat | X/3 | ... |
+...
+| **TOTAL** | **X/33** | |
+```
+
+**Do NOT include token counts, cost estimates, or API call counts.** Cost data is in COST_REPORT.md.
+
+---
+
+## Reprompt Efficiency Reference
+
+| Reprompts | Score |
+|-----------|-------|
+| 0 | 10/10 |
+| 1 | 9/10 |
+| 2 | 8/10 |
+| 3 | 7/10 |
+| 4–5 | 6/10 |
+| 6–7 | 5/10 |
+| 8–10 | 4/10 |
+| 11–15 | 2/10 |
+| 16+ | 0/10 |
diff --git a/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md b/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md
new file mode 100644
index 00000000000..0088bac2a5a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md
@@ -0,0 +1,140 @@
+# Grading Workflow
+
+How to grade generated apps and iterate on fixes.
+
+---
+
+## Overview
+
+```
+generate → you grade → report bugs → fix LLM fixes → you re-grade → repeat until done
+   ↑                                      ↑
+   token-tracked                          token-tracked
+```
+
+Code generation and fix iterations are token-tracked (the benchmark metric). Grading is manual and not tracked.
+
+---
+
+## Step 1: Generate
+
+```bash
+# One-shot, both backends, standard rules, level 7
+./run.sh --variant one-shot --level 7 --backend spacetime --rules standard --run-index 0
+./run.sh --variant one-shot --level 7 --backend postgres --rules standard --run-index 1
+```
+
+After generation, apps are running at:
+- **SpacetimeDB**: `http://localhost:5173` (run-index 0)
+- **PostgreSQL**: `http://localhost:5274` (run-index 1)
+
+Port offsets for parallel runs: run-index N uses ports `5173 + N*100` (spacetime) and `5174 + N*100` (postgres).
+
+---
+
+## Step 2: Grade
+
+Open each app in the browser. Test every feature at the current level.
+
+### Level 7 features (10 features, max 30 points):
+
+| # | Feature | What to check | Max |
+|---|---------|---------------|-----|
+| 1 | Basic Chat | Register with name, create room, send messages, see online users | 3 |
+| 2 | Typing Indicators | "is typing" shows when other user types, auto-expires after ~5s | 3 |
+| 3 | Read Receipts | "Seen by X" appears under messages after another user views them | 3 |
+| 4 | Unread Counts | Numeric badge on rooms with unread messages, clears when opened | 3 |
+| 5 | Scheduled Messages | Schedule button, time picker, pending list with cancel option | 3 |
+| 6 | Ephemeral Messages | Duration picker, countdown indicator, message auto-deletes | 3 |
+| 7 | Reactions | Emoji react button on hover, count updates, toggle on/off | 3 |
+| 8 | Message Editing | Edit button on own messages, "(edited)" indicator, edit history | 3 |
+| 9 | Permissions | Admin badge, kick/promote buttons, immediate effect | 3 |
+| 10 | Presence | Status selector (online/away/DND/invisible), colored dots | 3 |
+
+### Scoring
+
+- **3** = Fully works, no issues
+- **2** = Mostly works, minor issues (e.g., UI glitch but feature functional)
+- **1** = Partially implemented (e.g., button exists but doesn't do anything useful)
+- **0** = Missing or completely broken
+
+### Two-user features
+
+Features 2, 3, and parts of 1/4/9 need two users to fully test. Open two browser windows:
+- Window 1: register as Alice
+- Window 2: register as Bob (use incognito or a different browser profile for separate identity)
+
+If you can't test with two users, note which features were single-user tested only.
+
+---
+
+## Step 3: Report Bugs
+
+Tell Claude Code the bugs. Format:
+
+> **spacetime bugs:** typing indicators don't show, reactions button does nothing, app has no CSS styling, scheduled messages UI is just a raw checkbox
+
+Or more detailed:
+
+> **postgres bugs:**
+> - Feature 2: No typing indicator appears at all
+> - Feature 5: Schedule button exists but clicking it does nothing
+> - Feature 7: Emoji picker opens but selecting an emoji throws a console error
+> - General: Messages don't auto-scroll to bottom
+
+Claude Code will:
+1. Write `BUG_REPORT.md` in the app directory
+2. Run `./run.sh --fix <app-dir>` to launch the fix LLM
+3. Report when the fix is done
+
+The fix cost is token-tracked and adds to the benchmark total.
+
+---
+
+## Step 4: Re-grade
+
+After the fix completes, refresh the app in the browser and re-test the features that were broken.
+
+- If new bugs are found, report them → another fix iteration
+- If all features pass, you're done with this app
+
+---
+
+## Step 5: Record Results
+
+After all features pass (or you hit max iterations), the results are:
+
+- **Cost data**: automatically in `telemetry/*/cost-summary.json` (generation + all fix iterations)
+- **Grading results**: you provide the final scores
+
+Tell Claude Code:
+
+> **spacetime final scores:** F1=3, F2=2, F3=3, F4=3, F5=1, F6=3, F7=3, F8=2, F9=3, F10=3. Total: 26/30
+
+Claude Code will write the GRADING_RESULTS.md and generate the comparison report.
+
+---
+
+## Quick Reference
+
+| Action | Command |
+|--------|---------|
+| Generate (one-shot) | `./run.sh --variant one-shot --level 7 --backend spacetime --rules standard` |
+| Generate (sequential L1) | `./run.sh --level 1 --backend spacetime --rules standard` |
+| Upgrade to level N | `./run.sh --upgrade <app-dir> --level N --resume-session` |
+| Fix bugs | `./run.sh --fix <app-dir>` |
+| Parse telemetry | `node parse-telemetry.mjs <telemetry-dir> --logs-file=telemetry/logs.jsonl --extract-raw` |
+| Generate report | `node generate-report.mjs <run-base-dir>` |
+| Reset app state | `./reset-app.sh <app-dir>` |
+
+---
+
+## Feature Levels
+
+| Level | Features | Max Score |
+|-------|----------|-----------|
+| 1 | 1-4 (basic chat, typing, receipts, unread) | 12 |
+| 7 | 1-10 (+ scheduled, ephemeral, reactions, editing, permissions, presence) | 30 |
+| 12 | 1-15 (+ threading, private rooms, activity, drafts, anonymous migration) | 45 |
+| 15 | 1-18 (+ pinned, profiles, mentions/notifications) | 54 |
+| 19 | 1-22 (+ bookmarks, forwarding, slow mode, polls) | 66 |
diff --git a/tools/llm-sequential-upgrade/README.md b/tools/llm-sequential-upgrade/README.md
new file mode 100644
index 00000000000..0f2bd36ee72
--- /dev/null
+++ b/tools/llm-sequential-upgrade/README.md
@@ -0,0 +1,58 @@
+# LLM Sequential Upgrade Benchmark
+
+Automated benchmark harness for measuring AI app-generation cost, bug rate, and code size across backends. Designed to produce directly comparable data for the same app built on different stacks.
+
+Results viewer: https://spacetimedb.com/llms-benchmark-sequential-upgrade
+
+Generated test data (app source, telemetry, cost summaries): https://github.com/clockworklabs/spacetimedb-ai-test-results
+
+## What this measures
+
+For each backend under test, the harness drives a headless Claude Code session to:
+
+1. Generate a chat app from the L1 feature spec
+2. Upgrade through L2-L12 one feature group at a time
+3. After each level, a human grades the app against the feature spec
+4. Bugs are filed as `BUG_REPORT.md` and fixed via a separate Claude Code session
+5. All API costs are captured via OpenTelemetry and written to per-session cost summaries
+
+Side-by-side results give a direct comparison of AI-generation cost across backends for the same functional target.
+
+## Directory contents
+
+- `run.sh`: orchestrates generation, upgrade, and fix sessions. Supports `--upgrade`, `--fix`, `--composed-prompt`, `--resume-session`.
+- `grade.sh` / `grade-agents.sh` / `grade-playwright.sh`: grading harnesses (manual + automated)
+- `benchmark.sh` / `run-loop.sh`: batch runners for parallel or sequential benchmark execution
+- `cleanup.sh` / `reset-app.sh`: dev utilities
+- `benchmark-viewer.html`: local viewer for METRICS_DATA.json files (open in browser, drop JSON)
+- `generate-report.mjs`: aggregate per-session cost-summary.json into a markdown report
+- `parse-telemetry.mjs`: parse OTel log stream into per-session cost-summary.json
+- `parse-playwright-results.mjs`: convert Playwright JSON output to grading markdown
+- `docker-compose.otel.yaml` / `otel-collector-config.yaml`: OTel collector + PostgreSQL
+- `backends/`: per-backend setup / SDK reference documents given to the AI
+- `perf-benchmark/`: runtime throughput benchmark (msgs/sec) for the AI-generated apps
+- `CLAUDE.md` / `DEVELOP.md` / `GRADING.md` / `GRADING_WORKFLOW.md`: process documentation
+
+## Running a benchmark
+
+```bash
+# Prereqs: Claude CLI installed, Docker running, SpacetimeDB installed
+docker compose -f docker-compose.otel.yaml up -d
+
+# Generate L1 from scratch
+./run.sh --backend spacetime --level 1
+./run.sh --backend postgres --level 1
+
+# Upgrade through levels
+./run.sh --upgrade <app-dir> --level 2 --composed-prompt
+# ... continue through L12
+
+# Fix bugs found during grading
+./run.sh --fix <app-dir> --level N
+```
+
+Generated apps and telemetry land in `sequential-upgrade/sequential-upgrade-<timestamp>/` locally. For published test data from canonical runs, see the [AI Test Results repo](https://github.com/clockworklabs/spacetimedb-ai-test-results).
+
+## Performance benchmark
+
+`perf-benchmark/` contains a runtime stress tool that fires concurrent writers against a generated app's `send_message` handler to measure sustained throughput in messages/sec. See `perf-benchmark/README.md` for usage.
diff --git a/tools/llm-sequential-upgrade/backends/postgres.md b/tools/llm-sequential-upgrade/backends/postgres.md
new file mode 100644
index 00000000000..f65246cc0d2
--- /dev/null
+++ b/tools/llm-sequential-upgrade/backends/postgres.md
@@ -0,0 +1,314 @@
+# Backend: PostgreSQL
+
+Instructions for generating, building, and deploying the **PostgreSQL** backend.
+
+**Do NOT read SpacetimeDB SDK rule files.** This backend uses standard Node.js/TypeScript patterns.
+
+---
+
+## Architecture
+
+- **Server:** Node.js + Express + Drizzle ORM + Socket.io
+- **Client:** React + Vite + TypeScript + Socket.io-client
+- **Database:** PostgreSQL (running in Docker)
+
+The server handles:
+- REST API endpoints for CRUD operations
+- Socket.io for real-time events (messages, typing, presence, etc.)
+- Drizzle ORM for database queries
+- Session/identity management
+
+---
+
+## PostgreSQL Connection
+
+PostgreSQL is already running in a Docker container.
+
+| Parameter | Value |
+|-----------|-------|
+| Host | `localhost` |
+| Port | `6432` (mapped from container 5432) |
+| User | `spacetime` |
+| Password | `spacetime` |
+| Database | `spacetime` |
+| Container | `spacetime-web-postgres-1` |
+| Connection URL | `postgresql://spacetime:spacetime@localhost:6432/spacetime` |
+
+---
+
+## Pre-flight Check
+
+```bash
+docker exec spacetime-web-postgres-1 psql -U spacetime -d spacetime -c "SELECT 1"
+```
+
+If PostgreSQL is not reachable, STOP and report the error.
+
+---
+
+## Directory Structure
+
+```
+<app-dir>/
+  server/
+    package.json
+    tsconfig.json
+    drizzle.config.ts
+    .env
+    src/
+      schema.ts      # Drizzle ORM table definitions
+      index.ts       # Express server + Socket.io + routes
+  client/
+    package.json
+    vite.config.ts
+    tsconfig.json
+    index.html
+    src/
+      main.tsx       # React entry point
+      App.tsx        # Main application component
+      styles.css     # Dark theme styling
+```
+
+---
+
+## Phase 1: Generate Server
+
+Create the Express + Socket.io server:
+
+- `server/package.json`:
+  ```json
+  {
+    "name": "chat-server",
+    "type": "module",
+    "scripts": {
+      "dev": "tsx watch src/index.ts",
+      "start": "tsx src/index.ts"
+    },
+    "dependencies": {
+      "express": "^4.18.2",
+      "@types/express": "^4.17.21",
+      "drizzle-orm": "^0.39.0",
+      "pg": "^8.13.0",
+      "@types/pg": "^8.11.0",
+      "socket.io": "^4.7.4",
+      "cors": "^2.8.5",
+      "@types/cors": "^2.8.17",
+      "dotenv": "^16.4.5",
+      "drizzle-kit": "^0.30.0",
+      "tsx": "^4.19.0",
+      "typescript": "^5.4.0"
+    }
+  }
+  ```
+
+- `server/tsconfig.json`:
+  ```json
+  {
+    "compilerOptions": {
+      "target": "ES2022",
+      "module": "ES2022",
+      "moduleResolution": "bundler",
+      "esModuleInterop": true,
+      "strict": true,
+      "outDir": "dist",
+      "rootDir": "src",
+      "skipLibCheck": true
+    },
+    "include": ["src/**/*"]
+  }
+  ```
+
+- `server/.env`:
+  ```
+  DATABASE_URL=postgresql://spacetime:spacetime@localhost:6432/spacetime
+  PORT=6001
+  ```
+
+- `server/drizzle.config.ts`:
+  ```typescript
+  import { defineConfig } from 'drizzle-kit';
+
+  export default defineConfig({
+    schema: './src/schema.ts',
+    out: './drizzle',
+    dialect: 'postgresql',
+    dbCredentials: {
+      url: process.env.DATABASE_URL || 'postgresql://spacetime:spacetime@localhost:6432/spacetime',
+    },
+  });
+  ```
+
+- `server/src/schema.ts` — Drizzle ORM table definitions for all features
+- `server/src/index.ts` — Express server with:
+  - CORS configured for `http://localhost:6273`
+  - Socket.io with CORS
+  - REST endpoints for the app's resources (per the feature spec)
+  - Socket.io events for real-time updates (per the feature spec)
+  - Database queries via Drizzle ORM
+
+Install and push schema:
+```bash
+cd <server-dir> && npm install
+npx drizzle-kit push
+```
+
+---
+
+## Phase 2: (No bindings step)
+
+Skip — PostgreSQL has no binding generation. The client calls REST/Socket.io APIs directly.
+
+---
+
+## Phase 3: Generate Client
+
+- `client/package.json`:
+  ```json
+  {
+    "name": "chat-client",
+    "type": "module",
+    "scripts": {
+      "dev": "vite",
+      "build": "tsc -b && vite build"
+    },
+    "dependencies": {
+      "react": "^18.3.1",
+      "react-dom": "^18.3.1",
+      "socket.io-client": "^4.7.4"
+    },
+    "devDependencies": {
+      "@types/react": "^18.3.12",
+      "@types/react-dom": "^18.3.1",
+      "@vitejs/plugin-react": "^4.3.4",
+      "typescript": "^5.4.0",
+      "vite": "^6.0.0"
+    }
+  }
+  ```
+
+- `client/vite.config.ts` — port **6273** (NOT 6173 — that's SpacetimeDB), proxy `/api` and `/socket.io` to `http://localhost:6001`
+  ```typescript
+  import { defineConfig } from 'vite';
+  import react from '@vitejs/plugin-react';
+
+  export default defineConfig({
+    plugins: [react()],
+    server: {
+      port: 6273,
+      proxy: {
+        '/api': 'http://localhost:6001',
+        '/socket.io': {
+          target: 'http://localhost:6001',
+          ws: true,
+        },
+      },
+    },
+  });
+  ```
+
+- `client/tsconfig.json`
+- `client/index.html`
+- `client/src/main.tsx` — React entry point
+- `client/src/App.tsx` — Main component using `fetch('/api/...')` + Socket.io client
+- `client/src/styles.css` — Dark theme styling
+
+**The client connects to the server via the Vite proxy** — no hardcoded localhost:6001 in client code.
+
+**Critical:** Initialize the socket.io client without a hardcoded URL so it routes through the Vite proxy (e.g. `io()` or `io({ path: '/socket.io' })`). Hardcoding `http://localhost:6001` bypasses the proxy and breaks WebSocket upgrades.
+
+---
+
+## Phase 4: Verify
+
+```bash
+# Server
+cd <server-dir> && npm install && npx tsc --noEmit
+
+# Client
+cd <client-dir> && npm install && npx tsc --noEmit && npm run build
+```
+
+Both must pass. If either fails:
+1. Read the error
+2. Fix the code
+3. Retry (up to 3 attempts)
+4. Each fix counts as a **reprompt** — log it
+
+---
+
+## Phase 5: Deploy
+
+```bash
+# Kill any existing servers
+npx kill-port 6273 2>/dev/null || true
+npx kill-port 6001 2>/dev/null || true
+
+# Start the API server in background
+cd <server-dir> && npx tsx src/index.ts &
+
+# Wait for API server to be ready (poll http://localhost:6001 up to 30s)
+
+# Start client dev server in background
+cd <client-dir> && npm run dev &
+```
+
+Wait for both servers to be ready:
+- API server at `http://localhost:6001`
+- Client dev server at `http://localhost:6273`
+
+---
+
+## Redeploy (for fix iterations)
+
+- If **server changed**: kill and restart the Express server
+  ```bash
+  npx kill-port 6001 2>/dev/null || true
+  cd <server-dir> && npx tsx src/index.ts &
+  ```
+- If **schema changed**: push new schema before restarting
+  ```bash
+  cd <server-dir> && npx drizzle-kit push
+  ```
+- If **client changed**: Vite HMR handles it automatically (or restart dev server if needed)
+
+---
+
+## Key Differences from SpacetimeDB
+
+For context on what makes this backend different (this helps the benchmark comparison):
+
+| Aspect | SpacetimeDB | PostgreSQL |
+|--------|-------------|------------|
+| Real-time | Built-in subscriptions | Socket.io (manual) |
+| API layer | Reducers (auto-exposed) | Express routes (manual) |
+| Schema | `table()` + `reducer()` | Drizzle `pgTable()` |
+| Bindings | Auto-generated types | Manual type definitions |
+| Deployment | `spacetime publish` | Start Express server |
+| State sync | Automatic client cache | Manual fetch + Socket.io |
+| Online presence | Via lifecycle hooks | Manual Socket.io tracking |
+| Typing indicators | Reducer + subscription | Socket.io events |
+| Infra dependencies | SpacetimeDB only | PostgreSQL + Express + Socket.io + CORS |
+
+---
+
+## App Identity
+
+- HTML `<title>` MUST be **"PostgreSQL Chat"** (not "Chat App", not "SpacetimeDB Chat")
+- The app MUST show **"PostgreSQL Chat"** as the visible header/title in the UI
+- This distinguishes it from the SpacetimeDB version during testing
+
+---
+
+## Port Configuration
+
+| Service | Port | Notes |
+|---------|------|-------|
+| PostgreSQL (Docker) | 6432 | Database |
+| Express API server | 6001 | REST + Socket.io |
+| Vite dev server | **6273** | React client — NOT 6173 (that's SpacetimeDB) |
+
+---
+
+## Reference Files
+
+The language and feature prompt files are provided as absolute paths in the launch prompt. No additional reference files are needed — this backend uses standard Node.js/TypeScript patterns.
diff --git a/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md b/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md
new file mode 100644
index 00000000000..337af9269a4
--- /dev/null
+++ b/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md
@@ -0,0 +1,258 @@
+# SpacetimeDB TypeScript SDK Reference
+
+## Imports
+
+```typescript
+import { schema, table, t } from 'spacetimedb/server';
+import { SenderError } from 'spacetimedb/server';
+import { ScheduleAt } from 'spacetimedb';        // for scheduled tables only
+```
+
+## Tables
+
+`table(OPTIONS, COLUMNS)` — two arguments. The `name` field MUST be snake_case:
+
+```typescript
+const entity = table(
+  { name: 'entity', public: true },
+  {
+    identity: t.identity().primaryKey(),
+    name: t.string(),
+    active: t.bool(),
+  }
+);
+```
+
+Options: `name` (snake_case, required), `public: true`, `event: true`, `scheduled: (): any => reducerRef`, `indexes: [...]`
+
+`ctx.db` accessors use the JS variable name (camelCase), not the SQL name.
+
+## Column Types
+
+| Builder | JS type | Notes |
+|---------|---------|-------|
+| `t.u64()` | bigint | Use `0n` literals |
+| `t.i64()` | bigint | Use `0n` literals |
+| `t.u32()` / `t.i32()` | number | |
+| `t.f64()` / `t.f32()` | number | |
+| `t.bool()` | boolean | |
+| `t.string()` | string | |
+| `t.identity()` | Identity | |
+| `t.timestamp()` | Timestamp | |
+| `t.scheduleAt()` | ScheduleAt | |
+
+Modifiers: `.primaryKey()`, `.autoInc()`, `.unique()`, `.index('btree')`
+
+Optional columns: `nickname: t.option(t.string())`
+
+## Indexes
+
+Prefer inline `.index('btree')` for single-column. Use named indexes only for multi-column:
+
+```typescript
+// Inline (preferred):
+authorId: t.u64().index('btree'),
+// Access: ctx.db.post.authorId.filter(authorId);
+
+// Multi-column (named):
+indexes: [{ accessor: 'by_cat_sev', algorithm: 'btree', columns: ['category', 'severity'] }]
+```
+
+## Schema Export
+
+```typescript
+const spacetimedb = schema({ entity, record });  // ONE object, not spread args
+export default spacetimedb;
+```
+
+## Reducers
+
+Export name becomes the reducer name:
+
+```typescript
+export const createEntity = spacetimedb.reducer(
+  { name: t.string(), age: t.i32() },
+  (ctx, { name, age }) => {
+    ctx.db.entity.insert({ identity: ctx.sender, name, age, active: true });
+  }
+);
+
+// No arguments — just the callback:
+export const doReset = spacetimedb.reducer((ctx) => { ... });
+```
+
+## DB Operations
+
+```typescript
+ctx.db.entity.insert({ id: 0n, name: 'Sample' });          // Insert (0n for autoInc)
+ctx.db.entity.id.find(entityId);                           // Find by PK → row | null
+ctx.db.entity.identity.find(ctx.sender);                   // Find by unique column
+[...ctx.db.item.authorId.filter(authorId)];                // Filter → spread to Array
+[...ctx.db.entity.iter()];                                 // All rows → Array
+ctx.db.entity.id.update({ ...existing, name: newName });   // Update (spread + override)
+ctx.db.entity.id.delete(entityId);                         // Delete by PK
+```
+
+Note: `iter()` and `filter()` return iterators. Spread to Array for `.sort()`, `.filter()`, `.map()`.
+
+## Lifecycle Hooks
+
+MUST be `export const` — bare calls are silently ignored:
+
+```typescript
+export const init = spacetimedb.init((ctx) => { ... });
+export const onConnect = spacetimedb.clientConnected((ctx) => { ... });
+export const onDisconnect = spacetimedb.clientDisconnected((ctx) => { ... });
+```
+
+## Authentication & Timestamps
+
+```typescript
+// Auth: ctx.sender is the caller's Identity
+if (!row.owner.equals(ctx.sender)) throw new SenderError('unauthorized');
+
+// Server timestamps
+ctx.db.item.insert({ id: 0n, createdAt: ctx.timestamp });
+
+// Client: Timestamp → Date
+new Date(Number(row.createdAt.microsSinceUnixEpoch / 1000n));
+```
+
+## Scheduled Tables
+
+```typescript
+const tickTimer = table({
+  name: 'tick_timer',
+  scheduled: (): any => tick,   // (): any => breaks circular dep
+}, {
+  scheduledId: t.u64().primaryKey().autoInc(),
+  scheduledAt: t.scheduleAt(),
+});
+
+export const tick = spacetimedb.reducer(
+  { timer: tickTimer.rowType },
+  (ctx, { timer }) => { /* timer row auto-deleted after this runs */ }
+);
+
+// One-time: ScheduleAt.time(ctx.timestamp.microsSinceUnixEpoch + delayMicros)
+// Repeating: ScheduleAt.interval(60_000_000n)
+```
+
+## React Client
+
+### main.tsx — SpacetimeDBProvider is required
+
+```typescript
+import React, { useMemo } from 'react';
+import ReactDOM from 'react-dom/client';
+import { SpacetimeDBProvider } from 'spacetimedb/react';
+import { DbConnection } from './module_bindings';
+import { MODULE_NAME, SPACETIMEDB_URI } from './config';
+import App from './App';
+
+function Root() {
+  const connectionBuilder = useMemo(() =>
+    DbConnection.builder()
+      .withUri(SPACETIMEDB_URI)
+      .withDatabaseName(MODULE_NAME)
+      .withToken(localStorage.getItem('auth_token') || undefined),
+    []
+  );
+  return (
+    <SpacetimeDBProvider connectionBuilder={connectionBuilder}>
+      <App />
+    </SpacetimeDBProvider>
+  );
+}
+
+ReactDOM.createRoot(document.getElementById('root')!).render(<Root />);
+```
+
+### App.tsx patterns
+
+```typescript
+import { useTable, useSpacetimeDB } from 'spacetimedb/react';
+import { DbConnection, tables } from './module_bindings';
+
+function App() {
+  const { isActive, identity: myIdentity, token, getConnection } = useSpacetimeDB();
+  const conn = getConnection() as DbConnection | null;
+
+  // Save auth token
+  useEffect(() => { if (token) localStorage.setItem('auth_token', token); }, [token]);
+
+  // Subscribe when connected
+  useEffect(() => {
+    if (!conn || !isActive) return;
+    conn.subscriptionBuilder()
+      .onApplied(() => setSubscribed(true))
+      .subscribe(['SELECT * FROM entity', 'SELECT * FROM record']);
+  }, [conn, isActive]);
+
+  // Reactive data
+  const [entities] = useTable(tables.entity);
+  const [records] = useTable(tables.record);
+
+  // Call reducers with object syntax
+  conn?.reducers.addRecord({ data });
+
+  // Compare identities
+  const isMe = row.owner.toHexString() === myIdentity?.toHexString();
+}
+```
+
+## Complete Example
+
+```typescript
+// schema.ts
+import { schema, table, t } from 'spacetimedb/server';
+
+const entity = table({ name: 'entity', public: true }, {
+  identity: t.identity().primaryKey(),
+  name: t.string(),
+  active: t.bool(),
+});
+
+const record = table({ name: 'record', public: true }, {
+  id: t.u64().primaryKey().autoInc(),
+  owner: t.identity(),
+  value: t.u32(),
+  createdAt: t.timestamp(),
+});
+
+const spacetimedb = schema({ entity, record });
+export default spacetimedb;
+```
+
+```typescript
+// index.ts
+import spacetimedb from './schema';
+import { t, SenderError } from 'spacetimedb/server';
+export { default } from './schema';
+
+export const onConnect = spacetimedb.clientConnected((ctx) => {
+  const existing = ctx.db.entity.identity.find(ctx.sender);
+  if (existing) ctx.db.entity.identity.update({ ...existing, active: true });
+});
+
+export const onDisconnect = spacetimedb.clientDisconnected((ctx) => {
+  const existing = ctx.db.entity.identity.find(ctx.sender);
+  if (existing) ctx.db.entity.identity.update({ ...existing, active: false });
+});
+
+export const createEntity = spacetimedb.reducer(
+  { name: t.string() },
+  (ctx, { name }) => {
+    if (ctx.db.entity.identity.find(ctx.sender)) throw new SenderError('already exists');
+    ctx.db.entity.insert({ identity: ctx.sender, name, active: true });
+  }
+);
+
+export const addRecord = spacetimedb.reducer(
+  { value: t.u32() },
+  (ctx, { value }) => {
+    if (!ctx.db.entity.identity.find(ctx.sender)) throw new SenderError('not found');
+    ctx.db.record.insert({ id: 0n, owner: ctx.sender, value, createdAt: ctx.timestamp });
+  }
+);
+```
diff --git a/tools/llm-sequential-upgrade/backends/spacetime-templates.md b/tools/llm-sequential-upgrade/backends/spacetime-templates.md
new file mode 100644
index 00000000000..0847c58f21a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/backends/spacetime-templates.md
@@ -0,0 +1,141 @@
+# SpacetimeDB File Templates
+
+## Backend Templates
+
+### backend/spacetimedb/package.json
+```json
+{
+  "name": "chat-app-backend",
+  "type": "module",
+  "version": "1.0.0",
+  "dependencies": {
+    "spacetimedb": "^2.0.0"
+  }
+}
+```
+
+### backend/spacetimedb/tsconfig.json
+```json
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "ESNext",
+    "moduleResolution": "node",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "outDir": "./dist"
+  },
+  "include": ["src/**/*"]
+}
+```
+
+### File Organization
+```
+src/schema.ts   -> All tables, indexes, export spacetimedb
+src/index.ts    -> Import schema, define all reducers and lifecycle hooks
+```
+
+Why this structure? Avoids circular dependency issues between tables and reducers.
+
+---
+
+## Client Templates
+
+### client/package.json
+```json
+{
+  "name": "chat-app-client",
+  "private": true,
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "kill-port": "npx kill-port 6173 2>nul || true",
+    "dev": "npm run kill-port && vite",
+    "build": "tsc && vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "spacetimedb": "^2.0.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.18",
+    "@types/react-dom": "^18.3.5",
+    "@vitejs/plugin-react": "^4.3.4",
+    "typescript": "^5.7.2",
+    "vite": "^6.0.3"
+  }
+}
+```
+
+### client/vite.config.ts
+```typescript
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    port: 6173,  // NEVER use 3000 — conflicts with SpacetimeDB
+  },
+});
+```
+
+### client/tsconfig.json
+```json
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true
+  },
+  "include": ["src"]
+}
+```
+
+### client/index.html
+```html
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>SpacetimeDB Chat</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
+```
+
+### client/src/config.ts
+```typescript
+export const MODULE_NAME = 'chat-app-TIMESTAMP';  // Replace TIMESTAMP with actual module name
+export const SPACETIMEDB_URI = 'ws://localhost:3000';
+```
+
+---
+
+## Port Configuration
+
+| Service | Port | Notes |
+|---------|------|-------|
+| SpacetimeDB server | 3000 | WebSocket connections |
+| Vite dev server | 6173 | React client |
+
+**Never run Vite on port 3000** — it conflicts with SpacetimeDB.
diff --git a/tools/llm-sequential-upgrade/backends/spacetime.md b/tools/llm-sequential-upgrade/backends/spacetime.md
new file mode 100644
index 00000000000..891206b8090
--- /dev/null
+++ b/tools/llm-sequential-upgrade/backends/spacetime.md
@@ -0,0 +1,130 @@
+# Backend: SpacetimeDB
+
+Instructions for generating, building, and deploying the **SpacetimeDB** backend.
+
+---
+
+## Pre-flight Check
+
+```bash
+spacetime server ping local
+```
+
+If SpacetimeDB is not running, STOP and report the error.
+
+---
+
+## Directory Structure
+
+```
+<app-dir>/
+  backend/spacetimedb/
+    package.json
+    tsconfig.json
+    src/
+      schema.ts      # All tables and indexes
+      index.ts       # All reducers and lifecycle hooks
+  client/
+    package.json
+    vite.config.ts
+    tsconfig.json
+    index.html
+    src/
+      config.ts      # Module name and SpacetimeDB URI
+      main.tsx       # React entry point
+      App.tsx        # Main application component
+      styles.css     # Dark theme styling
+      module_bindings/  # Auto-generated (Phase 2)
+```
+
+---
+
+## Phase 1: Generate Backend
+
+- Create `backend/spacetimedb/package.json` (use template in "Backend Templates" section below)
+- Create `backend/spacetimedb/tsconfig.json` (use template below)
+- Create `backend/spacetimedb/src/schema.ts` — all tables and indexes
+- Create `backend/spacetimedb/src/index.ts` — all reducers and lifecycle hooks
+- Install and publish:
+  ```bash
+  cd <backend-dir> && npm install
+  spacetime publish chat-app-<timestamp> --module-path <backend-dir>
+  ```
+
+**Module naming:** Use the timestamped folder name as the module name (e.g. `chat-app-20260330-143000`).
+
+---
+
+## Phase 2: Generate Bindings
+
+```bash
+spacetime generate --lang typescript --out-dir <client>/src/module_bindings --module-path <backend-dir>
+```
+
+Read the generated bindings to know the exact type names (table names, reducer signatures) before writing client code.
+
+---
+
+## Phase 3: Generate Client
+
+Generate client files using the REAL binding types from Phase 2.
+
+- Create `client/package.json` (use template below)
+- Create `client/vite.config.ts` (use template below)
+- Create `client/tsconfig.json` (use template below)
+- Create `client/index.html` (use template below)
+- Create `client/src/config.ts` — module name and SpacetimeDB URI
+- Create `client/src/main.tsx` — React entry point
+- Create `client/src/App.tsx` — main application component
+- Create `client/src/styles.css` — dark theme styling
+
+**CRITICAL:** Import from `./module_bindings` using the REAL generated type names, not guessed ones.
+
+---
+
+## Phase 4: Verify
+
+```bash
+cd <client-dir> && npm install
+npx tsc --noEmit          # Type-check
+npm run build             # Full production build
+```
+
+Both must pass. If either fails:
+1. Read the error
+2. Fix the code
+3. Retry (up to 3 attempts)
+4. Each fix counts as a **reprompt** — log it
+
+---
+
+## Phase 5: Deploy
+
+```bash
+# Kill any existing dev server
+npx kill-port 6173 2>/dev/null || true
+
+# Start dev server in background
+cd <client-dir> && npm run dev &
+```
+
+Wait for the dev server to be ready (poll `http://localhost:6173` up to 30 seconds).
+
+---
+
+## App Identity
+
+- HTML `<title>` MUST be **"SpacetimeDB Chat"** (not "Chat App" or anything generic)
+- The app MUST show **"SpacetimeDB Chat"** as the visible header/title in the UI
+- This distinguishes it from the PostgreSQL version during testing
+
+---
+
+## Redeploy (for fix iterations)
+
+- If **backend changed**: re-publish module, regenerate bindings if schema changed
+  ```bash
+  spacetime publish chat-app-<timestamp> --module-path <backend-dir>
+  spacetime generate --lang typescript --out-dir <client>/src/module_bindings --module-path <backend-dir>
+  ```
+- If **client changed**: Vite HMR handles it automatically (or restart dev server if needed)
diff --git a/tools/llm-sequential-upgrade/benchmark-viewer.html b/tools/llm-sequential-upgrade/benchmark-viewer.html
new file mode 100644
index 00000000000..8b232482774
--- /dev/null
+++ b/tools/llm-sequential-upgrade/benchmark-viewer.html
@@ -0,0 +1,574 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+<title>SpacetimeDB vs PostgreSQL — Benchmark Viewer</title>
+<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { background: #0d0d0e; color: #e6e9f0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; padding: 24px; }
+  h1 { font-size: 22px; font-weight: 700; margin-bottom: 4px; background: linear-gradient(266deg, #4cf490 0%, #8a38f5 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; }
+  .subtitle { color: #6f7987; font-size: 13px; margin-bottom: 12px; }
+  .toolbar { display: flex; align-items: center; gap: 10px; margin-bottom: 14px; flex-wrap: wrap; }
+  .toolbar-label { font-size: 12px; color: #6f7987; }
+  .btn { display: inline-flex; align-items: center; gap: 6px; background: #141416; border: 1px solid #3a3a42; border-radius: 6px; padding: 6px 12px; cursor: pointer; font-size: 12px; color: #e6e9f0; transition: border-color 0.15s, color 0.15s; white-space: nowrap; }
+  .btn:hover { border-color: #4cf490; color: #4cf490; }
+  .file-label { display: inline-flex; align-items: center; gap: 6px; }
+  .file-label input { display: none; }
+  /* Run list panel */
+  #run-panel { display: none; background: #141416; border: 1px solid #202126; border-radius: 8px; margin-bottom: 16px; overflow: hidden; }
+  #run-panel-header { display: flex; align-items: center; justify-content: space-between; padding: 10px 14px; border-bottom: 1px solid #202126; }
+  #run-panel-title { font-size: 12px; font-weight: 600; color: #6f7987; text-transform: uppercase; letter-spacing: 0.05em; }
+  #run-panel-close { background: none; border: none; color: #6f7987; cursor: pointer; font-size: 16px; line-height: 1; padding: 0 2px; }
+  #run-panel-close:hover { color: #e6e9f0; }
+  #run-list { list-style: none; max-height: 240px; overflow-y: auto; }
+  #run-list li { display: flex; align-items: baseline; gap: 10px; padding: 9px 14px; border-bottom: 1px solid #181820; cursor: pointer; transition: background 0.1s; }
+  #run-list li:last-child { border-bottom: none; }
+  #run-list li:hover { background: #1e1e24; }
+  #run-list li.active { background: #1a2a1e; }
+  #run-list li.active .run-name { color: #4cf490; }
+  .run-name { font-size: 13px; color: #e6e9f0; font-weight: 500; }
+  .run-path { font-size: 11px; color: #6f7987; }
+  #run-scanning { padding: 12px 14px; font-size: 12px; color: #6f7987; }
+  #loaded-file { font-size: 12px; color: #4cf490; }
+  #error-banner { display: none; background: #2a1010; border: 1px solid #5a2020; border-radius: 8px; padding: 12px 16px; color: #f87171; font-size: 12px; margin-bottom: 12px; }
+  #error-banner code { background: #1a1010; padding: 2px 5px; border-radius: 3px; }
+  #empty-state { display: none; flex-direction: column; align-items: center; justify-content: center; gap: 16px; padding: 60px 24px; border: 1px dashed #2a2a32; border-radius: 12px; margin-bottom: 28px; text-align: center; }
+  #empty-state .empty-icon { font-size: 40px; }
+  #empty-state h3 { font-size: 16px; font-weight: 600; color: #e6e9f0; }
+  #empty-state p { font-size: 13px; color: #6f7987; max-width: 420px; line-height: 1.6; }
+  #empty-state kbd { background: #1e1e24; border: 1px solid #3a3a42; border-radius: 4px; padding: 2px 7px; font-size: 12px; font-family: inherit; color: #e6e9f0; }
+  .summary-bar { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 28px; }
+  .stat { background: #141416; border: 1px solid #202126; border-radius: 8px; padding: 14px 18px; flex: 1; min-width: 140px; }
+  .stat-label { font-size: 11px; color: #6f7987; text-transform: uppercase; letter-spacing: 0.05em; margin-bottom: 6px; }
+  .stat-row { display: flex; gap: 16px; }
+  .stat-val { font-size: 20px; font-weight: 700; }
+  .stat-val.stdb { color: #4cf490; }
+  .stat-val.pg { color: #336791; }
+  .stat-sub { font-size: 11px; color: #6f7987; margin-top: 2px; }
+  .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(480px, 1fr)); gap: 20px; }
+  .card { background: #141416; border: 1px solid #202126; border-radius: 10px; padding: 20px; }
+  .card h2 { font-size: 13px; font-weight: 600; color: #6f7987; text-transform: uppercase; letter-spacing: 0.06em; margin-bottom: 16px; }
+  .chart-wrap { position: relative; height: 240px; }
+  .legend { display: flex; gap: 16px; margin-bottom: 12px; }
+  .legend-item { display: flex; align-items: center; gap: 6px; font-size: 12px; color: #e6e9f0; }
+  .legend-dot { width: 10px; height: 10px; border-radius: 50%; }
+  .note { font-size: 11px; color: #6f7987; margin-top: 10px; font-style: italic; }
+  .full-width { grid-column: 1 / -1; }
+</style>
+</head>
+<body>
+
+<h1>SpacetimeDB vs PostgreSQL — AI App Generation Benchmark</h1>
+<p class="subtitle">Sequential upgrade · L1–L11 · 13 feature groups · Claude Sonnet 4.6 · Chat app</p>
+
+<div class="toolbar">
+  <span class="toolbar-label">Load data:</span>
+  <button class="btn" id="btn-dir" title="Browse a folder — skips node_modules (Chrome/Edge)">📁 Open folder…</button>
+  <label class="btn file-label" title="Pick a single METRICS_DATA*.json file">
+    <input type="file" id="file-picker" accept=".json">
+    📄 Open file…
+  </label>
+  <span id="loaded-file"></span>
+</div>
+
+<div id="run-panel">
+  <div id="run-panel-header">
+    <span id="run-panel-title">Runs found</span>
+    <button id="run-panel-close" title="Close">✕</button>
+  </div>
+  <div id="run-scanning" style="display:none">Scanning…</div>
+  <ul id="run-list"></ul>
+</div>
+
+<div id="error-banner"></div>
+
+<div id="empty-state">
+  <div class="empty-icon">📊</div>
+  <h3>No run loaded</h3>
+  <p>
+    Click <kbd>📁 Open folder…</kbd> to select the <code>llm-sequential-upgrade/</code> directory —
+    the viewer will find all <code>METRICS_DATA*.json</code> files inside and list them for selection.<br><br>
+    Or click <kbd>📄 Open file…</kbd> to load a single file directly.
+  </p>
+</div>
+
+<div class="summary-bar">
+  <div class="stat">
+    <div class="stat-label">Total Cost</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-cost">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-cost">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Total Bugs</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-bugs">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-bugs">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Fix Sessions</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-sessions">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-sessions">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Final LOC (hand-written)</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-loc">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-loc">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Total Time</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-time">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-time">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+  <div class="stat">
+    <div class="stat-label">Fix Success (1st attempt)</div>
+    <div class="stat-row">
+      <div><div class="stat-val stdb" id="stat-stdb-success">—</div><div class="stat-sub">SpacetimeDB</div></div>
+      <div><div class="stat-val pg"   id="stat-pg-success">—</div><div class="stat-sub">PostgreSQL</div></div>
+    </div>
+  </div>
+</div>
+
+<div class="legend">
+  <div class="legend-item"><div class="legend-dot" style="background:#4cf490"></div>SpacetimeDB</div>
+  <div class="legend-item"><div class="legend-dot" style="background:#336791"></div>PostgreSQL</div>
+</div>
+
+<div class="grid">
+
+  <!-- Upgrade Cost per Level -->
+  <div class="card">
+    <h2>Upgrade Cost per Level ($)</h2>
+    <div class="chart-wrap"><canvas id="upgradeCost"></canvas></div>
+  </div>
+
+  <!-- Fix Cost per Level -->
+  <div class="card">
+    <h2>Fix Cost per Level ($)</h2>
+    <div class="chart-wrap"><canvas id="fixCost"></canvas></div>
+    <p class="note">Fix costs attributed to the level where the bug was introduced.</p>
+  </div>
+
+  <!-- Total Cost per Level (stacked) -->
+  <div class="card">
+    <h2>Total Cost per Level — Upgrade + Fix ($)</h2>
+    <div class="chart-wrap"><canvas id="totalCostPerLevel"></canvas></div>
+  </div>
+
+  <!-- Cumulative Cost -->
+  <div class="card">
+    <h2>Cumulative Cost over Levels ($)</h2>
+    <div class="chart-wrap"><canvas id="cumulativeCost"></canvas></div>
+  </div>
+
+  <!-- Bugs per Level -->
+  <div class="card">
+    <h2>Bugs Found per Level</h2>
+    <div class="chart-wrap"><canvas id="bugsPerLevel"></canvas></div>
+  </div>
+
+  <!-- Cumulative Bugs -->
+  <div class="card">
+    <h2>Cumulative Bugs over Levels</h2>
+    <div class="chart-wrap"><canvas id="cumulativeBugs"></canvas></div>
+  </div>
+
+  <!-- LOC over Levels (full width) -->
+  <div class="card full-width">
+    <h2>Lines of Code over Levels (hand-written, excl. generated bindings)</h2>
+    <div class="chart-wrap" style="height:280px"><canvas id="locOverLevels"></canvas></div>
+  </div>
+
+  <!-- Backend LOC only -->
+  <div class="card">
+    <h2>Backend LOC over Levels</h2>
+    <div class="chart-wrap"><canvas id="backendLoc"></canvas></div>
+  </div>
+
+  <!-- Frontend LOC only -->
+  <div class="card">
+    <h2>Frontend LOC over Levels</h2>
+    <div class="chart-wrap"><canvas id="frontendLoc"></canvas></div>
+  </div>
+
+  <!-- Upgrade Duration -->
+  <div class="card">
+    <h2>Upgrade Duration per Level (seconds)</h2>
+    <div class="chart-wrap"><canvas id="duration"></canvas></div>
+  </div>
+
+  <!-- Output Tokens -->
+  <div class="card">
+    <h2>Output Tokens per Upgrade</h2>
+    <div class="chart-wrap"><canvas id="outputTokens"></canvas></div>
+  </div>
+
+</div>
+
+<script>
+// Data is loaded from METRICS_DATA.json inside each run folder at runtime.
+// Open this file via a local HTTP server, e.g.:
+//   python -m http.server 8080
+//   npx serve .
+
+const LEVELS   = ['L1','L2','L3','L4','L5','L6','L7','L8','L9','L10','L11'];
+const FEATURES = [
+  'Basic Chat + Typing + Read Receipts + Unread',
+  'Scheduled Messages','Ephemeral Messages','Message Reactions',
+  'Message Editing w/ History','Real-Time Permissions','Rich User Presence',
+  'Message Threading','Private Rooms + DMs','Room Activity Indicators','Draft Sync'
+];
+
+const STDB_COLOR = '#4cf490';
+const PG_COLOR   = '#336791';
+
+// ── HELPERS ───────────────────────────────────────────────────────────────────
+
+function cumSum(arr) {
+  let s = 0;
+  return arr.map(v => parseFloat((s += v).toFixed(4)));
+}
+
+function add(a, b) { return a.map((v, i) => parseFloat((v + b[i]).toFixed(4))); }
+
+function fmtMins(seconds) {
+  return Math.round(seconds / 60) + 'm';
+}
+
+function fmtLocK(n) {
+  return n >= 1000 ? (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k' : String(n);
+}
+
+// ── CHART FACTORIES ───────────────────────────────────────────────────────────
+
+Chart.defaults.color = '#6f7987';
+Chart.defaults.borderColor = '#202126';
+Chart.defaults.font.family = "-apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif";
+Chart.defaults.font.size = 11;
+
+const baseTooltip = {
+  backgroundColor: '#1a1a1e',
+  borderColor: '#202126',
+  borderWidth: 1,
+  titleColor: '#e6e9f0',
+  bodyColor: '#a0aab8',
+  padding: 10,
+  callbacks: {
+    title: (items) => `${LEVELS[items[0].dataIndex]} — ${FEATURES[items[0].dataIndex]}`
+  }
+};
+
+function lineDataset(label, data, color, fill = false) {
+  return {
+    label, data,
+    borderColor: color,
+    backgroundColor: fill ? color + '22' : 'transparent',
+    pointBackgroundColor: color,
+    pointRadius: 4,
+    pointHoverRadius: 6,
+    borderWidth: 2,
+    tension: 0.3,
+    fill,
+  };
+}
+
+function barDataset(label, data, color) {
+  return {
+    label, data,
+    backgroundColor: color + 'cc',
+    borderColor: color,
+    borderWidth: 1,
+    borderRadius: 4,
+  };
+}
+
+const baseScales = {
+  x: { grid: { color: '#202126' } },
+  y: { grid: { color: '#202126' }, beginAtZero: true }
+};
+
+// makeLineChart / makeBarChart are defined in the BOOT section below
+// (they track instances so data can be reloaded without page refresh)
+
+// ── RENDER ────────────────────────────────────────────────────────────────────
+
+function render(d) {
+  const pl = d.per_level;
+  const bugsStdb = d.bugs_by_level.map(r => r.spacetime);
+  const bugsPg   = d.bugs_by_level.map(r => r.postgres);
+
+  const upgradeCostStdb = pl.upgrade_cost.spacetime;
+  const upgradeCostPg   = pl.upgrade_cost.postgres;
+  const fixCostStdb     = pl.fix_cost.spacetime;
+  const fixCostPg       = pl.fix_cost.postgres;
+
+  const stdbBackendLoc  = pl.loc_backend.spacetime;
+  const stdbFrontendLoc = pl.loc_frontend.spacetime;
+  const stdbBindingsLoc = pl.loc_bindings.spacetime;
+  const pgBackendLoc    = pl.loc_backend.postgres;
+  const pgFrontendLoc   = pl.loc_frontend.postgres;
+
+  const durationStdb = pl.duration_seconds.spacetime;
+  const durationPg   = pl.duration_seconds.postgres;
+  const tokensStdb   = pl.output_tokens.spacetime;
+  const tokensPg     = pl.output_tokens.postgres;
+
+  const totalStdb    = add(upgradeCostStdb, fixCostStdb);
+  const totalPg      = add(upgradeCostPg,   fixCostPg);
+  const stdbTotalLoc = stdbBackendLoc.map((v, i) => v + stdbFrontendLoc[i]);
+  const pgTotalLoc   = pgBackendLoc.map((v, i) => v + pgFrontendLoc[i]);
+
+  // ── STAT BARS ───────────────────────────────────────────────────────────────
+  document.getElementById('stat-stdb-cost').textContent    = '$' + d.cost.spacetime.total.toFixed(2);
+  document.getElementById('stat-pg-cost').textContent      = '$' + d.cost.postgres.total.toFixed(2);
+  document.getElementById('stat-stdb-bugs').textContent    = d.quality.spacetime.total_bugs;
+  document.getElementById('stat-pg-bugs').textContent      = d.quality.postgres.total_bugs;
+  document.getElementById('stat-stdb-sessions').textContent = d.quality.spacetime.fix_sessions;
+  document.getElementById('stat-pg-sessions').textContent  = d.quality.postgres.fix_sessions;
+  document.getElementById('stat-stdb-loc').textContent     = d.loc.spacetime.total_handwritten.toLocaleString();
+  document.getElementById('stat-pg-loc').textContent       = d.loc.postgres.total_handwritten.toLocaleString();
+  document.getElementById('stat-stdb-time').textContent    = fmtMins(d.time_seconds.spacetime.total);
+  document.getElementById('stat-pg-time').textContent      = fmtMins(d.time_seconds.postgres.total);
+  document.getElementById('stat-stdb-success').textContent = d.quality.spacetime.first_attempt_fix_rate_pct + '%';
+  document.getElementById('stat-pg-success').textContent   = d.quality.postgres.first_attempt_fix_rate_pct + '%';
+
+  // ── CHARTS ──────────────────────────────────────────────────────────────────
+  makeLineChart('upgradeCost', [
+    lineDataset('SpacetimeDB', upgradeCostStdb, STDB_COLOR),
+    lineDataset('PostgreSQL',  upgradeCostPg,   PG_COLOR),
+  ], '$');
+
+  makeBarChart('fixCost', [
+    barDataset('SpacetimeDB', fixCostStdb, STDB_COLOR),
+    barDataset('PostgreSQL',  fixCostPg,   PG_COLOR),
+  ], false, '$');
+
+  makeBarChart('totalCostPerLevel', [
+    { ...barDataset('STDB Upgrade', upgradeCostStdb, STDB_COLOR), stack: 'stdb' },
+    { ...barDataset('STDB Fix',     fixCostStdb,     '#4cf49066'), stack: 'stdb', borderColor: '#4cf490' },
+    { ...barDataset('PG Upgrade',   upgradeCostPg,   PG_COLOR),   stack: 'pg' },
+    { ...barDataset('PG Fix',       fixCostPg,       '#33679166'), stack: 'pg',   borderColor: '#336791' },
+  ], true, '$');
+
+  makeLineChart('cumulativeCost', [
+    lineDataset('SpacetimeDB', cumSum(totalStdb), STDB_COLOR, true),
+    lineDataset('PostgreSQL',  cumSum(totalPg),   PG_COLOR,   true),
+  ], '$');
+
+  makeBarChart('bugsPerLevel', [
+    barDataset('SpacetimeDB', bugsStdb, STDB_COLOR),
+    barDataset('PostgreSQL',  bugsPg,   PG_COLOR),
+  ], false, 'bugs');
+
+  makeLineChart('cumulativeBugs', [
+    lineDataset('SpacetimeDB', cumSum(bugsStdb), STDB_COLOR),
+    lineDataset('PostgreSQL',  cumSum(bugsPg),   PG_COLOR),
+  ], 'bugs');
+
+  makeLineChart('locOverLevels', [
+    lineDataset('STDB Total (hand-written)', stdbTotalLoc, STDB_COLOR),
+    lineDataset('PG Total',                 pgTotalLoc,   PG_COLOR),
+    lineDataset('STDB incl. bindings', stdbBackendLoc.map((v,i) => v + stdbFrontendLoc[i] + stdbBindingsLoc[i]), '#4cf49055'),
+  ], 'LOC');
+
+  makeLineChart('backendLoc', [
+    lineDataset('SpacetimeDB backend', stdbBackendLoc, STDB_COLOR),
+    lineDataset('PostgreSQL server',   pgBackendLoc,   PG_COLOR),
+  ], 'LOC');
+
+  makeLineChart('frontendLoc', [
+    lineDataset('SpacetimeDB client', stdbFrontendLoc, STDB_COLOR),
+    lineDataset('PostgreSQL client',  pgFrontendLoc,   PG_COLOR),
+  ], 'LOC');
+
+  makeBarChart('duration', [
+    barDataset('SpacetimeDB', durationStdb, STDB_COLOR),
+    barDataset('PostgreSQL',  durationPg,   PG_COLOR),
+  ], false, 'seconds');
+
+  makeBarChart('outputTokens', [
+    barDataset('SpacetimeDB', tokensStdb, STDB_COLOR),
+    barDataset('PostgreSQL',  tokensPg,   PG_COLOR),
+  ], false, 'tokens');
+}
+
+// ── BOOT ─────────────────────────────────────────────────────────────────────
+
+const chartInstances = {};
+const _OrigChart = Chart;
+function makeLineChart(id, datasets, yLabel = '') {
+  if (chartInstances[id]) chartInstances[id].destroy();
+  chartInstances[id] = new _OrigChart(document.getElementById(id), {
+    type: 'line',
+    data: { labels: LEVELS, datasets },
+    options: {
+      responsive: true, maintainAspectRatio: false,
+      interaction: { mode: 'index', intersect: false },
+      plugins: { legend: { display: false }, tooltip: baseTooltip },
+      scales: { ...baseScales, y: { ...baseScales.y, title: { display: !!yLabel, text: yLabel, color: '#6f7987' } } }
+    }
+  });
+}
+
+function makeBarChart(id, datasets, stacked = false, yLabel = '') {
+  if (chartInstances[id]) chartInstances[id].destroy();
+  chartInstances[id] = new _OrigChart(document.getElementById(id), {
+    type: 'bar',
+    data: { labels: LEVELS, datasets },
+    options: {
+      responsive: true, maintainAspectRatio: false,
+      interaction: { mode: 'index', intersect: false },
+      plugins: { legend: { display: false }, tooltip: baseTooltip },
+      scales: {
+        x: { ...baseScales.x, stacked },
+        y: { ...baseScales.y, stacked, title: { display: !!yLabel, text: yLabel, color: '#6f7987' } }
+      }
+    }
+  });
+}
+
+function showError(msg) {
+  const el = document.getElementById('error-banner');
+  el.style.display = 'block';
+  el.textContent = msg;
+}
+
+function clearError() {
+  const el = document.getElementById('error-banner');
+  el.style.display = 'none';
+  el.textContent = '';
+}
+
+function loadFromText(text, filename) {
+  clearError();
+  try {
+    render(JSON.parse(text));
+    document.getElementById('loaded-file').textContent = filename || '';
+    document.getElementById('empty-state').style.display = 'none';
+  } catch (e) {
+    showError(`Parse error in ${filename}: ${e.message}`);
+  }
+}
+
+// ── FILE PICKER (single file) ─────────────────────────────────────────────────
+document.getElementById('file-picker').addEventListener('change', function () {
+  const file = this.files[0];
+  if (!file) return;
+  const reader = new FileReader();
+  reader.onload = e => loadFromText(e.target.result, file.name);
+  reader.readAsText(file);
+  this.value = '';
+});
+
+// ── DIRECTORY BROWSER ─────────────────────────────────────────────────────────
+// Uses showDirectoryPicker (Chrome/Edge) so we can skip node_modules during
+// traversal rather than forcing the browser to enumerate every file first.
+
+const SKIP_DIRS = new Set(['node_modules', '.git', '.svn', 'dist', '.cache', '__pycache__']);
+
+async function scanDir(dirHandle, pathParts, depth, results) {
+  if (depth === 0) return;
+  try {
+    for await (const [name, handle] of dirHandle.entries()) {
+      if (handle.kind === 'file' && /^METRICS_DATA.*\.json$/i.test(name)) {
+        results.push({ name, pathParts: [...pathParts], handle });
+      } else if (handle.kind === 'directory' && !SKIP_DIRS.has(name) && !name.startsWith('.')) {
+        await scanDir(handle, [...pathParts, name], depth - 1, results);
+      }
+    }
+  } catch (_) { /* permission denied — skip */ }
+}
+
+function renderRunList(runs) {
+  const panel = document.getElementById('run-panel');
+  const list  = document.getElementById('run-list');
+  const title = document.getElementById('run-panel-title');
+  document.getElementById('run-scanning').style.display = 'none';
+
+  title.textContent = `${runs.length} run${runs.length !== 1 ? 's' : ''} found`;
+  list.innerHTML = '';
+
+  if (runs.length === 0) {
+    list.innerHTML = '<li style="padding:10px 14px;color:#6f7987;font-size:12px;">No METRICS_DATA*.json files found here.</li>';
+    panel.style.display = 'block';
+    return;
+  }
+
+  runs.forEach((run) => {
+    const li = document.createElement('li');
+    const dir = run.pathParts.join(' / ') || '(root)';
+    const nameSpan = document.createElement('span');
+    nameSpan.className = 'run-name';
+    nameSpan.textContent = dir;
+    const pathSpan = document.createElement('span');
+    pathSpan.className = 'run-path';
+    pathSpan.textContent = run.name;
+    li.appendChild(nameSpan);
+    li.appendChild(pathSpan);
+    li.title = [...run.pathParts, run.name].join('/');
+    li.addEventListener('click', async () => {
+      list.querySelectorAll('li').forEach(el => el.classList.remove('active'));
+      li.classList.add('active');
+      clearError();
+      try {
+        const file = await run.handle.getFile();
+        loadFromText(await file.text(), li.title);
+      } catch (e) {
+        showError(`<strong>Could not read file:</strong> ${e.message}`);
+      }
+    });
+    list.appendChild(li);
+  });
+
+  panel.style.display = 'block';
+  list.firstElementChild.click();
+}
+
+document.getElementById('btn-dir').addEventListener('click', async () => {
+  if (!window.showDirectoryPicker) {
+    showError(
+      `<strong>Folder browsing requires Chrome or Edge.</strong> ` +
+      `Use <em>📄 Open file…</em> to load a <code>METRICS_DATA*.json</code> directly — ` +
+      `it's in the <code>llm-sequential-upgrade/</code> root or inside a run directory.`
+    );
+    return;
+  }
+  let dirHandle;
+  try { dirHandle = await window.showDirectoryPicker({ mode: 'read' }); }
+  catch (e) { if (e.name !== 'AbortError') showError('Could not open folder: ' + e.message); return; }
+
+  clearError();
+  const panel    = document.getElementById('run-panel');
+  const scanning = document.getElementById('run-scanning');
+  document.getElementById('run-list').innerHTML = '';
+  document.getElementById('run-panel-title').textContent = 'Scanning…';
+  scanning.style.display = 'block';
+  panel.style.display = 'block';
+
+  const runs = [];
+  await scanDir(dirHandle, [dirHandle.name], 6, runs);
+  runs.sort((a, b) => a.pathParts.join('/').localeCompare(b.pathParts.join('/')));
+  renderRunList(runs);
+});
+
+document.getElementById('run-panel-close').addEventListener('click', () => {
+  document.getElementById('run-panel').style.display = 'none';
+});
+
+// ── INITIAL STATE ─────────────────────────────────────────────────────────────
+// Show a welcome placeholder; user loads data via the toolbar buttons.
+document.getElementById('empty-state').style.display = 'flex';
+
+</script>
+</body>
+</html>
diff --git a/tools/llm-sequential-upgrade/benchmark.sh b/tools/llm-sequential-upgrade/benchmark.sh
new file mode 100644
index 00000000000..9ba4f0f4b89
--- /dev/null
+++ b/tools/llm-sequential-upgrade/benchmark.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+# Sequential Upgrade — Parallel Benchmark Launcher
+#
+# Runs multiple test instances in parallel for statistical significance.
+# Each instance gets isolated ports via --run-index.
+#
+# Usage:
+#   ./benchmark.sh                                    # 3 sequential-upgrade runs, both backends
+#   ./benchmark.sh --runs 5                           # 5 runs
+#   ./benchmark.sh --variant one-shot --runs 3        # 3 one-shot runs
+#   ./benchmark.sh --backend spacetime --runs 3       # single backend only
+#   ./benchmark.sh --rules standard --runs 3          # SDK-only rules
+#   ./benchmark.sh --level 15                         # up to level 15 (22 features)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# ─── Parse arguments ─────────────────────────────────────────────────────────
+
+NUM_RUNS=3
+VARIANT="sequential-upgrade"
+RULES="guided"
+TEST_MODE=""
+LEVEL=""
+BACKENDS=("spacetime" "postgres")
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --runs) NUM_RUNS="$2"; shift 2 ;;
+    --variant) VARIANT="$2"; shift 2 ;;
+    --rules) RULES="$2"; shift 2 ;;
+    --test) TEST_MODE="$2"; shift 2 ;;
+    --level) LEVEL="$2"; shift 2 ;;
+    --backend) BACKENDS=("$2"); shift 2 ;;
+    *) echo "Unknown option: $1"; exit 1 ;;
+  esac
+done
+
+TEST_FLAG=""
+if [[ -n "$TEST_MODE" ]]; then
+  TEST_FLAG="--test $TEST_MODE"
+fi
+
+# ─── Compute total parallel instances ────────────────────────────────────────
+
+NUM_BACKENDS=${#BACKENDS[@]}
+TOTAL_INSTANCES=$((NUM_RUNS * NUM_BACKENDS))
+
+echo "═══════════════════════════════════════════════════"
+echo "  Sequential Upgrade Benchmark"
+echo "═══════════════════════════════════════════════════"
+echo "  Variant:   $VARIANT"
+echo "  Rules:     $RULES"
+echo "  Level:     ${LEVEL:-auto}"
+echo "  Backends:  ${BACKENDS[*]}"
+echo "  Runs:      $NUM_RUNS per backend"
+echo "  Total:     $TOTAL_INSTANCES parallel instances"
+echo ""
+echo "  Port allocation:"
+for i in $(seq 0 $((TOTAL_INSTANCES - 1))); do
+  OFFSET=$((i * 100))
+  echo "    Run $i: Vite(stdb)=$((5173 + OFFSET)) Vite(pg)=$((5174 + OFFSET)) Express=$((3001 + OFFSET)) PG=$((5433 + OFFSET))"
+done
+echo "═══════════════════════════════════════════════════"
+echo ""
+
+# ─── Validate prerequisites ─────────────────────────────────────────────────
+
+# Add Claude Code desktop install to PATH
+_APPDATA_UNIX="${APPDATA:-$HOME/AppData/Roaming}"
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  _APPDATA_UNIX=$(cygpath "$_APPDATA_UNIX" 2>/dev/null || echo "$_APPDATA_UNIX")
+fi
+CLAUDE_DESKTOP_DIR="$_APPDATA_UNIX/Claude/claude-code"
+if [[ -d "$CLAUDE_DESKTOP_DIR" ]]; then
+  CLAUDE_LATEST=$(ls -d "$CLAUDE_DESKTOP_DIR"/*/ 2>/dev/null | sort -V | tail -1)
+  if [[ -n "$CLAUDE_LATEST" ]]; then
+    export PATH="$PATH:$CLAUDE_LATEST"
+  fi
+fi
+
+# Check Claude CLI
+if ! command -v claude &>/dev/null && ! command -v claude.exe &>/dev/null; then
+  echo "ERROR: Claude Code CLI not found."
+  exit 1
+fi
+
+echo "Starting $TOTAL_INSTANCES parallel instances..."
+echo ""
+
+# ─── Status tracking ────────────────────────────────────────────────────────
+
+STATUS_FILE="$SCRIPT_DIR/benchmark-status.json"
+echo '{}' > "$STATUS_FILE"
+
+update_status() {
+  local idx="$1" backend="$2" status="$3" detail="${4:-}"
+  node -e "
+    const fs = require('fs');
+    const f = process.argv[1];
+    const s = JSON.parse(fs.readFileSync(f, 'utf-8'));
+    s['run-${idx}-${backend}'] = {
+      runIndex: $idx,
+      backend: '${backend}',
+      status: '${status}',
+      detail: '${detail}',
+      updatedAt: new Date().toISOString()
+    };
+    fs.writeFileSync(f, JSON.stringify(s, null, 2));
+  " -- "$STATUS_FILE" 2>/dev/null || true
+}
+
+# ─── Launch all runs ────────────────────────────────────────────────────────
+# Each run gets its own run-loop.sh which handles:
+#   - Code generation (parallel, headless)
+#   - Chrome MCP grading (serialized via lock file)
+#   - Bug fix iterations (headless)
+#   - Sequential upgrades with regression testing (if applicable)
+
+PIDS=()
+RUN_INDEX=0
+
+for run_num in $(seq 1 "$NUM_RUNS"); do
+  for backend in "${BACKENDS[@]}"; do
+    LOG_FILE="$SCRIPT_DIR/benchmark-run${RUN_INDEX}-${backend}.log"
+
+    echo "[Run $RUN_INDEX] $backend (run $run_num/$NUM_RUNS) → $LOG_FILE"
+    update_status "$RUN_INDEX" "$backend" "starting" "level=${LEVEL:-auto}"
+
+    (
+      update_status "$RUN_INDEX" "$backend" "running" "$VARIANT"
+      "$SCRIPT_DIR/run-loop.sh" \
+        --backend "$backend" \
+        --variant "$VARIANT" \
+        --level "${LEVEL:-7}" \
+        --rules "$RULES" \
+        $TEST_FLAG \
+        --run-index "$RUN_INDEX"
+      update_status "$RUN_INDEX" "$backend" "completed" "exit=$?"
+    ) > "$LOG_FILE" 2>&1 &
+    PIDS+=($!)
+
+    RUN_INDEX=$((RUN_INDEX + 1))
+  done
+done
+
+echo ""
+echo "All $TOTAL_INSTANCES instances launched. PIDs: ${PIDS[*]}"
+echo ""
+echo "Monitor progress:"
+echo "  cat benchmark-status.json                # run status summary"
+echo "  tail -f benchmark-run*-*.log              # live output"
+echo ""
+echo "Waiting for all runs to complete..."
+
+# ─── Wait for all runs ──────────────────────────────────────────────────────
+
+FAILURES=0
+for i in "${!PIDS[@]}"; do
+  if wait "${PIDS[$i]}"; then
+    echo "[Run $i] completed successfully"
+  else
+    echo "[Run $i] FAILED (exit code $?)"
+    FAILURES=$((FAILURES + 1))
+  fi
+done
+
+echo ""
+echo "═══════════════════════════════════════════════════"
+echo "  Benchmark Complete"
+echo "  Successful: $((TOTAL_INSTANCES - FAILURES))/$TOTAL_INSTANCES"
+if [[ $FAILURES -gt 0 ]]; then
+  echo "  Failed: $FAILURES"
+fi
+echo "═══════════════════════════════════════════════════"
+# ─── Auto-generate reports ──────────────────────────────────────────────────
+
+echo ""
+echo "Generating reports for each run..."
+for run_dir in "$SCRIPT_DIR/$VARIANT"/*/; do
+  if [[ -d "$run_dir/telemetry" ]]; then
+    RUN_DIR_NATIVE="$run_dir"
+    if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+      RUN_DIR_NATIVE=$(cygpath -w "$run_dir")
+    fi
+    node "$SCRIPT_DIR/generate-report.mjs" "$RUN_DIR_NATIVE" 2>/dev/null && \
+      echo "  Report: $run_dir/BENCHMARK_REPORT.md" || \
+      echo "  WARNING: Report generation failed for $run_dir"
+  fi
+done
+
+echo ""
+echo "═══════════════════════════════════════════════════"
+echo "  All Done"
+echo "═══════════════════════════════════════════════════"
+echo ""
+echo "Results:"
+for run_dir in "$SCRIPT_DIR/$VARIANT"/*/; do
+  if [[ -f "$run_dir/BENCHMARK_REPORT.md" ]]; then
+    echo "  $run_dir/BENCHMARK_REPORT.md"
+  fi
+done
diff --git a/tools/llm-sequential-upgrade/cleanup.sh b/tools/llm-sequential-upgrade/cleanup.sh
new file mode 100644
index 00000000000..95c62f9b993
--- /dev/null
+++ b/tools/llm-sequential-upgrade/cleanup.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Clean up generated app directories after testing is complete.
+#
+# Removes isolation git repos, build artifacts, and temp files.
+# Run this after you're done grading and have recorded results.
+#
+# Usage:
+#   ./cleanup.sh <app-dir>            # clean one app
+#   ./cleanup.sh --all                # clean all apps in all variants
+#   ./cleanup.sh --variant one-shot   # clean all apps in a variant
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+cleanup_app() {
+  local app_dir="$1"
+  if [[ ! -d "$app_dir" ]]; then return; fi
+
+  echo "Cleaning: $app_dir"
+
+  # Remove isolation git repo
+  rm -rf "$app_dir/.git" 2>/dev/null && echo "  removed .git"
+
+  # Remove node_modules (can be reinstalled)
+  for nm in "$app_dir"/*/node_modules "$app_dir"/node_modules; do
+    if [[ -d "$nm" ]]; then
+      rm -rf "$nm" 2>/dev/null && echo "  removed $(basename $(dirname $nm))/node_modules"
+    fi
+  done
+
+  # Remove build artifacts
+  rm -rf "$app_dir"/*/dist "$app_dir"/*/.vite 2>/dev/null
+
+  # Remove dev server logs
+  rm -f "$app_dir"/*.log "$app_dir"/*/*.log 2>/dev/null
+
+  echo "  done"
+}
+
+if [[ "${1:-}" == "--all" ]]; then
+  for app_dir in "$SCRIPT_DIR"/*/*/results/*/chat-app-*; do
+    [[ -d "$app_dir" ]] && cleanup_app "$app_dir"
+  done
+elif [[ "${1:-}" == "--variant" ]]; then
+  VARIANT="${2:?Usage: ./cleanup.sh --variant <variant-name>}"
+  for app_dir in "$SCRIPT_DIR/$VARIANT"/*/results/*/chat-app-*; do
+    [[ -d "$app_dir" ]] && cleanup_app "$app_dir"
+  done
+else
+  APP_DIR="${1:?Usage: ./cleanup.sh <app-dir> | --all | --variant <name>}"
+  cleanup_app "$APP_DIR"
+fi
diff --git a/tools/llm-sequential-upgrade/docker-compose.otel.yaml b/tools/llm-sequential-upgrade/docker-compose.otel.yaml
new file mode 100644
index 00000000000..c5b529925bc
--- /dev/null
+++ b/tools/llm-sequential-upgrade/docker-compose.otel.yaml
@@ -0,0 +1,32 @@
+# Infrastructure for the sequential upgrade benchmark.
+# Run: docker compose -f docker-compose.otel.yaml up -d
+
+services:
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:latest
+    ports:
+      - "4317:4317"   # gRPC receiver
+      - "4318:4318"   # HTTP receiver
+    volumes:
+      - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
+      - ./telemetry:/telemetry
+    command: ["--config", "/etc/otelcol-contrib/config.yaml"]
+
+  postgres:
+    image: postgres:16
+    ports:
+      - "6432:5432"
+    environment:
+      POSTGRES_USER: spacetime
+      POSTGRES_PASSWORD: spacetime
+      POSTGRES_DB: spacetime
+    volumes:
+      - llm-sequential-upgrade-pgdata:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U spacetime"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  llm-sequential-upgrade-pgdata:
diff --git a/tools/llm-sequential-upgrade/generate-report.mjs b/tools/llm-sequential-upgrade/generate-report.mjs
new file mode 100644
index 00000000000..f0e78f56819
--- /dev/null
+++ b/tools/llm-sequential-upgrade/generate-report.mjs
@@ -0,0 +1,266 @@
+#!/usr/bin/env node
+
+/**
+ * Generates a BENCHMARK_REPORT.md by aggregating cost-summary.json files
+ * from a completed benchmark run.
+ *
+ * Usage:
+ *   node generate-report.mjs <run-base-dir>
+ *   node generate-report.mjs sequential-upgrade/sequential-upgrade-20260402
+ *
+ * Reads:  telemetry subdirs for cost-summary.json
+ * Reads:  results subdirs for GRADING_RESULTS.md (feature scores)
+ * Writes: BENCHMARK_REPORT.md in the run base directory
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+const runBaseDir = process.argv[2];
+if (!runBaseDir) {
+  console.error('Usage: node generate-report.mjs <run-base-dir>');
+  process.exit(1);
+}
+
+// Find all cost-summary.json files
+const telemetryDir = path.join(runBaseDir, 'telemetry');
+if (!fs.existsSync(telemetryDir)) {
+  console.error(`Telemetry directory not found: ${telemetryDir}`);
+  process.exit(1);
+}
+
+const summaries = [];
+for (const entry of fs.readdirSync(telemetryDir)) {
+  const summaryPath = path.join(telemetryDir, entry, 'cost-summary.json');
+  if (fs.existsSync(summaryPath)) {
+    const data = JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
+    data._dir = entry;
+    summaries.push(data);
+  }
+}
+
+if (summaries.length === 0) {
+  console.error('No cost-summary.json files found in telemetry subdirectories.');
+  console.error('Run parse-telemetry.mjs with --extract-raw first.');
+  process.exit(1);
+}
+
+// Group by backend
+const byBackend = {};
+for (const s of summaries) {
+  const backend = s.backend || 'unknown';
+  if (!byBackend[backend]) byBackend[backend] = [];
+  byBackend[backend].push(s);
+}
+
+// Sort each backend's summaries by level
+for (const backend of Object.keys(byBackend)) {
+  byBackend[backend].sort((a, b) => (a.level || 0) - (b.level || 0));
+}
+
+// Calculate totals per backend (sum of final run per level)
+function calcTotals(runs) {
+  // Group by level, take the last run per level (final successful)
+  const byLevel = {};
+  for (const r of runs) {
+    const level = r.level || 0;
+    byLevel[level] = r; // last one wins
+  }
+  const levels = Object.values(byLevel);
+  return {
+    totalCost: levels.reduce((s, r) => s + (r.totalCostUsd || 0), 0),
+    totalCalls: levels.reduce((s, r) => s + (r.apiCalls || 0), 0),
+    totalTokens: levels.reduce((s, r) => s + (r.totalTokens || 0), 0),
+    totalDuration: levels.reduce((s, r) => s + (r.totalDurationSec || 0), 0),
+    levelCount: levels.length,
+    levels,
+  };
+}
+
+// Read GRADING_RESULTS.md for feature scores
+function readGradingScores(backend) {
+  const resultsDir = path.join(runBaseDir, 'results', backend);
+  if (!fs.existsSync(resultsDir)) return null;
+
+  const appDirs = fs.readdirSync(resultsDir)
+    .filter(d => d.startsWith('chat-app-'))
+    .map(d => path.join(resultsDir, d))
+    .filter(d => fs.statSync(d).isDirectory());
+
+  if (appDirs.length === 0) return null;
+
+  // Take the most recent app dir
+  const appDir = appDirs.sort().pop();
+  const gradingPath = path.join(appDir, 'GRADING_RESULTS.md');
+  if (!fs.existsSync(gradingPath)) return null;
+
+  const content = fs.readFileSync(gradingPath, 'utf-8');
+
+  // Extract total score from "**TOTAL** | **N** | **M**"
+  const totalMatch = content.match(/\*\*TOTAL\*\*.*?\*\*(\d+)\*\*.*?\*\*(\d+)\*\*/);
+  if (totalMatch) {
+    return { max: parseInt(totalMatch[1]), score: parseInt(totalMatch[2]) };
+  }
+
+  // Fallback: look for "Total Feature Score" in metrics
+  const scoreMatch = content.match(/Total Feature Score.*?(\d+)\s*\/\s*(\d+)/);
+  if (scoreMatch) {
+    return { score: parseInt(scoreMatch[1]), max: parseInt(scoreMatch[2]) };
+  }
+
+  return null;
+}
+
+// Count lines of code in app dir
+function countLoc(backend) {
+  const resultsDir = path.join(runBaseDir, 'results', backend);
+  if (!fs.existsSync(resultsDir)) return null;
+
+  const appDirs = fs.readdirSync(resultsDir)
+    .filter(d => d.startsWith('chat-app-'))
+    .map(d => path.join(resultsDir, d))
+    .filter(d => fs.statSync(d).isDirectory());
+
+  if (appDirs.length === 0) return null;
+  const appDir = appDirs.sort().pop();
+
+  let backendLoc = 0;
+  let frontendLoc = 0;
+
+  function countLines(dir) {
+    if (!fs.existsSync(dir)) return 0;
+    let total = 0;
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      if (entry.name === 'node_modules' || entry.name === 'dist' || entry.name === '.vite' ||
+          entry.name === 'module_bindings' || entry.name.startsWith('level-')) continue;
+      const fullPath = path.join(dir, entry.name);
+      if (entry.isDirectory()) {
+        total += countLines(fullPath);
+      } else if (/\.(ts|tsx|js|jsx)$/.test(entry.name)) {
+        total += fs.readFileSync(fullPath, 'utf-8').split('\n').length;
+      }
+    }
+    return total;
+  }
+
+  // SpacetimeDB backend
+  const stdbBackend = path.join(appDir, 'backend', 'spacetimedb', 'src');
+  if (fs.existsSync(stdbBackend)) {
+    backendLoc = countLines(stdbBackend);
+  }
+
+  // PostgreSQL backend
+  const pgServer = path.join(appDir, 'server');
+  if (fs.existsSync(pgServer)) {
+    backendLoc = countLines(pgServer);
+  }
+
+  // Frontend
+  const clientSrc = path.join(appDir, 'client', 'src');
+  if (fs.existsSync(clientSrc)) {
+    frontendLoc = countLines(clientSrc);
+  }
+
+  return { backendLoc, frontendLoc, totalLoc: backendLoc + frontendLoc };
+}
+
+// ─── Generate report ────────────────────────────────────────────────────────
+
+const backends = Object.keys(byBackend);
+const date = new Date().toISOString().slice(0, 10);
+const variant = summaries[0]?.variant || 'unknown';
+const rules = summaries[0]?.rules || 'unknown';
+
+let report = `# LLM Cost-to-Done Benchmark Report
+
+**Generated:** ${date}
+**Variant:** ${variant}
+**Rules:** ${rules}
+
+---
+
+## Summary
+
+`;
+
+if (backends.length >= 2) {
+  const totals = {};
+  const scores = {};
+  const locs = {};
+  for (const b of backends) {
+    totals[b] = calcTotals(byBackend[b]);
+    scores[b] = readGradingScores(b);
+    locs[b] = countLoc(b);
+  }
+
+  const [b1, b2] = backends;
+  const t1 = totals[b1];
+  const t2 = totals[b2];
+  const costDelta = ((t2.totalCost - t1.totalCost) / t2.totalCost * 100).toFixed(0);
+  const cheaper = t1.totalCost < t2.totalCost ? b1 : b2;
+
+  report += `| | ${b1} | ${b2} | Delta |
+|--|-----|-----|-------|
+| **Total LLM cost** | **$${t1.totalCost.toFixed(2)}** | **$${t2.totalCost.toFixed(2)}** | ${cheaper} ${Math.abs(Number(costDelta))}% cheaper |
+| **API calls** | ${t1.totalCalls} | ${t2.totalCalls} | |
+| **Total tokens** | ${t1.totalTokens.toLocaleString()} | ${t2.totalTokens.toLocaleString()} | |
+| **Duration** | ${(t1.totalDuration / 60).toFixed(1)} min | ${(t2.totalDuration / 60).toFixed(1)} min | |
+`;
+
+  if (scores[b1] && scores[b2]) {
+    report += `| **Feature score** | ${scores[b1].score}/${scores[b1].max} | ${scores[b2].score}/${scores[b2].max} | |\n`;
+  }
+
+  if (locs[b1] && locs[b2]) {
+    report += `| **Backend LOC** | ${locs[b1].backendLoc} | ${locs[b2].backendLoc} | |\n`;
+    report += `| **Frontend LOC** | ${locs[b1].frontendLoc} | ${locs[b2].frontendLoc} | |\n`;
+    report += `| **Total LOC** | ${locs[b1].totalLoc} | ${locs[b2].totalLoc} | |\n`;
+  }
+} else {
+  const b = backends[0];
+  const t = calcTotals(byBackend[b]);
+  const s = readGradingScores(b);
+
+  report += `| Metric | Value |
+|--------|-------|
+| **Backend** | ${b} |
+| **Total LLM cost** | $${t.totalCost.toFixed(2)} |
+| **API calls** | ${t.totalCalls} |
+| **Total tokens** | ${t.totalTokens.toLocaleString()} |
+| **Duration** | ${(t.totalDuration / 60).toFixed(1)} min |
+`;
+  if (s) report += `| **Feature score** | ${s.score}/${s.max} |\n`;
+}
+
+// Per-level breakdown
+report += `\n---\n\n## Per-Level Cost Breakdown\n\n`;
+
+for (const backend of backends) {
+  report += `### ${backend}\n\n`;
+  report += `| Level | Cost | API Calls | Duration |\n`;
+  report += `|-------|------|-----------|----------|\n`;
+
+  const runs = byBackend[backend];
+  const byLevel = {};
+  for (const r of runs) {
+    byLevel[r.level || 0] = r;
+  }
+
+  for (const [level, r] of Object.entries(byLevel).sort((a, b) => a[0] - b[0])) {
+    report += `| ${level} | $${(r.totalCostUsd || 0).toFixed(2)} | ${r.apiCalls || 0} | ${((r.totalDurationSec || 0) / 60).toFixed(1)} min |\n`;
+  }
+
+  const t = calcTotals(runs);
+  report += `| **Total** | **$${t.totalCost.toFixed(2)}** | **${t.totalCalls}** | **${(t.totalDuration / 60).toFixed(1)} min** |\n\n`;
+}
+
+report += `---\n\n*Generated by generate-report.mjs*\n`;
+
+const outputPath = path.join(runBaseDir, 'BENCHMARK_REPORT.md');
+fs.writeFileSync(outputPath, report);
+console.log(`Report written to: ${outputPath}`);
+console.log(`Backends: ${backends.join(', ')}`);
+for (const b of backends) {
+  const t = calcTotals(byBackend[b]);
+  console.log(`  ${b}: $${t.totalCost.toFixed(2)} (${t.totalCalls} calls, ${(t.totalDuration / 60).toFixed(1)} min)`);
+}
diff --git a/tools/llm-sequential-upgrade/grade-agents.sh b/tools/llm-sequential-upgrade/grade-agents.sh
new file mode 100644
index 00000000000..d693a0e7165
--- /dev/null
+++ b/tools/llm-sequential-upgrade/grade-agents.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# Sequential Upgrade — Playwright Agents Grading
+#
+# Uses Playwright's AI-powered agents to grade a deployed app.
+# The Generator agent discovers UI elements from the live DOM,
+# writes tests with validated selectors, and runs them.
+# The Healer agent auto-fixes failing selectors.
+#
+# Usage:
+#   ./grade-agents.sh <app-dir>
+#
+# Prerequisites:
+#   cd test-plans/playwright && npm install && npx playwright install chromium
+#   npx playwright init-agents --loop=claude
+
+set -euo pipefail
+
+APP_DIR="${1:?Usage: ./grade-agents.sh <app-dir>}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
+
+if [[ ! -d "$APP_DIR" ]]; then
+  echo "ERROR: App directory not found: $APP_DIR"
+  exit 1
+fi
+
+# Check Playwright agents are initialized
+if [[ ! -f "$PLAYWRIGHT_DIR/.claude/agents/playwright-test-generator.md" ]]; then
+  echo "ERROR: Playwright agents not initialized."
+  echo "Run: cd test-plans/playwright && npx playwright init-agents --loop=claude"
+  exit 1
+fi
+
+# Auto-detect backend
+if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+  GRADE_BACKEND="spacetime"
+  DEFAULT_PORT=5173
+elif [[ -d "$APP_DIR/server" ]]; then
+  GRADE_BACKEND="postgres"
+  DEFAULT_PORT=5174
+else
+  GRADE_BACKEND="unknown"
+  DEFAULT_PORT=5173
+fi
+
+# Try to read port from metadata
+VITE_PORT="$DEFAULT_PORT"
+RUN_BASE="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
+if [[ -d "$RUN_BASE/telemetry" ]]; then
+  LATEST_META=$(find "$RUN_BASE/telemetry" -name "metadata.json" -path "*$GRADE_BACKEND*" -exec ls -t {} + 2>/dev/null | head -1)
+  if [[ -n "$LATEST_META" ]]; then
+    META_PORT=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(String(m.vitePort||''))" -- "$(cygpath -w "$LATEST_META" 2>/dev/null || echo "$LATEST_META")" 2>/dev/null)
+    if [[ -n "$META_PORT" ]]; then
+      VITE_PORT="$META_PORT"
+    fi
+  fi
+fi
+
+APP_URL="http://localhost:$VITE_PORT"
+
+echo "=== Sequential Upgrade: Playwright Agents Grade ==="
+echo "  App dir:  $APP_DIR"
+echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
+echo "  URL:      $APP_URL"
+echo ""
+
+# Reset backend state for a clean test
+echo "Resetting backend state..."
+"$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed"
+sleep 3
+
+# Update seed test to point at the correct URL
+cat > "$PLAYWRIGHT_DIR/specs/seed.spec.ts" <<EOF
+import { test, expect } from '@playwright/test';
+
+test.describe('Seed', () => {
+  test('seed', async ({ page }) => {
+    await page.goto('$APP_URL');
+    await page.waitForSelector('input, button', { timeout: 30_000 });
+  });
+});
+EOF
+
+# Add Claude Code desktop install to PATH
+_APPDATA_UNIX="${APPDATA:-$HOME/AppData/Roaming}"
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  _APPDATA_UNIX=$(cygpath "$_APPDATA_UNIX" 2>/dev/null || echo "$_APPDATA_UNIX")
+fi
+CLAUDE_DESKTOP_DIR="$_APPDATA_UNIX/Claude/claude-code"
+if [[ -d "$CLAUDE_DESKTOP_DIR" ]]; then
+  CLAUDE_LATEST=$(ls -d "$CLAUDE_DESKTOP_DIR"/*/ 2>/dev/null | sort -V | tail -1)
+  if [[ -n "$CLAUDE_LATEST" ]]; then
+    export PATH="$PATH:$CLAUDE_LATEST"
+  fi
+fi
+
+CLAUDE_CMD=""
+if command -v claude &>/dev/null; then
+  CLAUDE_CMD="claude"
+elif command -v claude.exe &>/dev/null; then
+  CLAUDE_CMD="claude.exe"
+else
+  echo "ERROR: Claude Code CLI not found."
+  exit 1
+fi
+
+echo ""
+echo "=== Phase 1: Generate Tests ==="
+echo "Running Playwright Test Generator agent..."
+echo ""
+
+cd "$PLAYWRIGHT_DIR"
+
+# Invoke the Generator agent via Claude Code to create tests from the plan
+$CLAUDE_CMD --print --dangerously-skip-permissions -p "
+You are running the Playwright Test Generator agent.
+
+Read the test plan at specs/plans/chat-app-features.md.
+For each test scenario in the plan:
+1. Use generator_setup_page to open the app
+2. Execute each step using the Playwright MCP tools (browser_click, browser_type, browser_snapshot, etc.)
+3. Read the generator log with generator_read_log
+4. Write the test with generator_write_test
+
+The app is running at $APP_URL. Generate tests for all scenarios in the plan.
+Important: Use browser_snapshot to inspect the DOM before interacting — do NOT guess selectors.
+" 2>&1 | tee "$APP_DIR/agent-generator-output.log"
+
+echo ""
+echo "=== Phase 2: Run Generated Tests ==="
+
+# Run whatever tests were generated
+APP_URL="$APP_URL" npx playwright test --reporter=json \
+  1>/tmp/pw-agent-results.json 2>/dev/null || true
+
+RESULTS_SIZE=$(wc -c < /tmp/pw-agent-results.json 2>/dev/null || echo "0")
+
+if [[ "$RESULTS_SIZE" -gt 100 ]]; then
+  echo ""
+  echo "=== Phase 3: Parse Results ==="
+
+  PW_RESULTS="/tmp/pw-agent-results.json"
+  APP_DIR_NATIVE="$APP_DIR"
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    PW_RESULTS=$(cygpath -w "$PW_RESULTS")
+    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+  fi
+
+  node "$SCRIPT_DIR/parse-playwright-results.mjs" "$PW_RESULTS" "$APP_DIR_NATIVE" "$GRADE_BACKEND"
+
+  echo ""
+  echo "=== Results ==="
+  echo "  GRADING_RESULTS.md: $APP_DIR"
+  echo "  Generator log: $APP_DIR/agent-generator-output.log"
+else
+  echo "WARNING: No test results produced."
+  echo "Check the generator output: $APP_DIR/agent-generator-output.log"
+fi
+
+cd "$SCRIPT_DIR"
diff --git a/tools/llm-sequential-upgrade/grade-playwright.sh b/tools/llm-sequential-upgrade/grade-playwright.sh
new file mode 100644
index 00000000000..73e5ed5d397
--- /dev/null
+++ b/tools/llm-sequential-upgrade/grade-playwright.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# Sequential Upgrade — Playwright Grading
+#
+# Runs deterministic Playwright tests against a deployed app and generates
+# GRADING_RESULTS.md. This is an alternative to the Chrome MCP grading agent.
+#
+# Usage:
+#   ./grade-playwright.sh <app-dir>
+#   ./grade-playwright.sh sequential-upgrade/sequential-upgrade-20260401/results/spacetime/chat-app-20260401-123403
+#
+# Prerequisites:
+#   cd test-plans/playwright && npm install && npx playwright install chromium
+
+set -euo pipefail
+
+APP_DIR="${1:?Usage: ./grade-playwright.sh <app-dir>}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
+
+if [[ ! -d "$APP_DIR" ]]; then
+  echo "ERROR: App directory not found: $APP_DIR"
+  exit 1
+fi
+
+# Check Playwright is installed
+if [[ ! -f "$PLAYWRIGHT_DIR/node_modules/.bin/playwright" ]]; then
+  echo "ERROR: Playwright not installed."
+  echo "Run: cd test-plans/playwright && npm install && npx playwright install chromium"
+  exit 1
+fi
+
+# Auto-detect backend from app directory structure
+if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+  GRADE_BACKEND="spacetime"
+  DEFAULT_PORT=5173
+elif [[ -d "$APP_DIR/server" ]]; then
+  GRADE_BACKEND="postgres"
+  DEFAULT_PORT=5174
+else
+  GRADE_BACKEND="unknown"
+  DEFAULT_PORT=5173
+fi
+
+# Try to read the port from telemetry metadata (set by --run-index)
+VITE_PORT="$DEFAULT_PORT"
+# Walk up from app dir to find telemetry metadata
+RUN_BASE="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
+if [[ -d "$RUN_BASE/telemetry" ]]; then
+  # Find the most recent metadata.json for this backend
+  LATEST_META=$(find "$RUN_BASE/telemetry" -name "metadata.json" -path "*$GRADE_BACKEND*" -exec ls -t {} + 2>/dev/null | head -1)
+  if [[ -n "$LATEST_META" ]]; then
+    META_PORT=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(String(m.vitePort||''))" -- "$(cygpath -w "$LATEST_META" 2>/dev/null || echo "$LATEST_META")" 2>/dev/null)
+    if [[ -n "$META_PORT" ]]; then
+      VITE_PORT="$META_PORT"
+    fi
+  fi
+fi
+
+APP_URL="http://localhost:$VITE_PORT"
+
+echo "=== Sequential Upgrade: Playwright Grade ==="
+echo "  App dir:  $APP_DIR"
+echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
+echo "  URL:      $APP_URL"
+echo ""
+
+# Reset backend state for a clean test
+echo "Resetting backend state..."
+"$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed"
+sleep 3
+
+# Run Playwright tests (BrowserContext isolation handles multi-user — no second server needed)
+cd "$PLAYWRIGHT_DIR"
+APP_URL="$APP_URL" npx playwright test --reporter=json 2>&1 | tee test-results/raw-output.json || true
+
+# Parse results into GRADING_RESULTS.md
+if [[ -f "test-results/results.json" ]]; then
+  echo ""
+  echo "Parsing Playwright results..."
+
+  # On Windows, convert paths for Node.js
+  APP_DIR_NATIVE="$APP_DIR"
+  RESULTS_FILE="$PLAYWRIGHT_DIR/test-results/results.json"
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+    RESULTS_FILE=$(cygpath -w "$RESULTS_FILE")
+  fi
+
+  node "$SCRIPT_DIR/parse-playwright-results.mjs" "$RESULTS_FILE" "$APP_DIR_NATIVE" "$GRADE_BACKEND"
+
+  echo ""
+  echo "=== Results ==="
+  echo "  GRADING_RESULTS.md written to: $APP_DIR"
+else
+  echo "ERROR: No Playwright results found at test-results/results.json"
+  exit 1
+fi
diff --git a/tools/llm-sequential-upgrade/grade.sh b/tools/llm-sequential-upgrade/grade.sh
new file mode 100644
index 00000000000..6f6bd7ff922
--- /dev/null
+++ b/tools/llm-sequential-upgrade/grade.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Sequential Upgrade — Grade & Test Loop
+#
+# Tests a deployed app via Chrome MCP, writes bug reports for the fix agent.
+# This runs INTERACTIVELY in Claude Code (not headless) because it needs Chrome MCP.
+#
+# Usage:
+#   ./grade.sh <app-dir>
+#   ./grade.sh sequential-upgrade/sequential-upgrade-20260401/results/spacetime/chat-app-20260401-123403
+#
+# This script is a convenience wrapper. You can also just open Claude Code
+# in the llm-sequential-upgrade/ directory and say:
+#   "Grade the app at results/spacetime/chat-app-20260331-083613"
+
+set -euo pipefail
+
+APP_DIR="${1:?Usage: ./grade.sh <app-dir>}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [[ ! -d "$APP_DIR" ]]; then
+  echo "ERROR: App directory not found: $APP_DIR"
+  exit 1
+fi
+
+# On Windows, convert to native path
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+  SCRIPT_DIR_NATIVE=$(cygpath -w "$SCRIPT_DIR")
+else
+  APP_DIR_NATIVE="$APP_DIR"
+  SCRIPT_DIR_NATIVE="$SCRIPT_DIR"
+fi
+
+# Find Claude CLI
+CLAUDE_CMD=""
+if command -v claude &>/dev/null; then
+  CLAUDE_CMD="claude"
+elif command -v claude.exe &>/dev/null; then
+  CLAUDE_CMD="claude.exe"
+elif command -v npx &>/dev/null; then
+  CLAUDE_CMD="npx @anthropic-ai/claude-code"
+else
+  echo "ERROR: Claude Code CLI not found (tried: claude, claude.exe, npx)."
+  echo "Install it with: npm install -g @anthropic-ai/claude-code"
+  exit 1
+fi
+
+echo "=== Sequential Upgrade: Grade ==="
+echo "  App dir: $APP_DIR_NATIVE"
+echo ""
+echo "This launches an INTERACTIVE Claude Code session with Chrome MCP."
+echo "It will test the deployed app, write bug reports, and grade features."
+echo ""
+
+# Auto-detect backend from app directory structure
+if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+  GRADE_BACKEND="spacetime"
+  VITE_PORT=5173
+elif [[ -d "$APP_DIR/server" ]]; then
+  GRADE_BACKEND="postgres"
+  VITE_PORT=5174
+else
+  GRADE_BACKEND="unknown"
+  VITE_PORT=5173
+fi
+echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
+
+# Interactive mode — no --print, no --dangerously-skip-permissions
+cd "$SCRIPT_DIR"
+$CLAUDE_CMD -p "Grade the sequential upgrade app at: $APP_DIR_NATIVE
+
+Backend: $GRADE_BACKEND
+
+Follow CLAUDE.md Phases 6-8:
+1. Open http://localhost:$VITE_PORT in Chrome and verify the app loads
+2. Test each feature using the test plans in test-plans/feature-*.md
+3. Score each feature 0-3 based on browser observations
+4. If any features score < 3, write a BUG_REPORT.md in the app directory with:
+   - Which features failed and why
+   - Exact error messages or broken behaviors observed
+   - Console errors from read_console_messages
+5. Write GRADING_RESULTS.md with scores
+6. Write/update ITERATION_LOG.md with this test iteration
+
+After grading, if there are bugs, tell the user to run:
+  ./run.sh --fix $APP_DIR_NATIVE"
diff --git a/tools/llm-sequential-upgrade/otel-collector-config.yaml b/tools/llm-sequential-upgrade/otel-collector-config.yaml
new file mode 100644
index 00000000000..0283d029edb
--- /dev/null
+++ b/tools/llm-sequential-upgrade/otel-collector-config.yaml
@@ -0,0 +1,28 @@
+# OpenTelemetry Collector config for capturing Claude Code telemetry to JSON files.
+
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+      http:
+        endpoint: 0.0.0.0:4318
+
+exporters:
+  # Write all events to a JSON file (one JSON object per line)
+  file/logs:
+    path: /telemetry/logs.jsonl
+    flush_interval: 1s
+
+  file/metrics:
+    path: /telemetry/metrics.jsonl
+    flush_interval: 5s
+
+service:
+  pipelines:
+    logs:
+      receivers: [otlp]
+      exporters: [file/logs]
+    metrics:
+      receivers: [otlp]
+      exporters: [file/metrics]
diff --git a/tools/llm-sequential-upgrade/parse-playwright-results.mjs b/tools/llm-sequential-upgrade/parse-playwright-results.mjs
new file mode 100644
index 00000000000..53fad567f00
--- /dev/null
+++ b/tools/llm-sequential-upgrade/parse-playwright-results.mjs
@@ -0,0 +1,169 @@
+#!/usr/bin/env node
+
+/**
+ * Converts Playwright JSON reporter output into GRADING_RESULTS.md
+ * matching the format used by the Chrome MCP grading agent.
+ *
+ * Usage:
+ *   node parse-playwright-results.mjs <results.json> <app-dir> <backend>
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+const resultsFile = process.argv[2];
+const appDir = process.argv[3];
+const backend = process.argv[4] || 'unknown';
+
+if (!resultsFile || !appDir) {
+  console.error('Usage: node parse-playwright-results.mjs <results.json> <app-dir> <backend>');
+  process.exit(1);
+}
+
+const results = JSON.parse(fs.readFileSync(resultsFile, 'utf-8'));
+
+// Feature name mapping: spec file name → feature number and name
+const FEATURES = {
+  'feature-01-basic-chat': { num: 1, name: 'Basic Chat' },
+  'feature-02-typing-indicators': { num: 2, name: 'Typing Indicators' },
+  'feature-03-read-receipts': { num: 3, name: 'Read Receipts' },
+  'feature-04-unread-counts': { num: 4, name: 'Unread Counts' },
+  'feature-05-scheduled-messages': { num: 5, name: 'Scheduled Messages' },
+  'feature-06-ephemeral-messages': { num: 6, name: 'Ephemeral Messages' },
+  'feature-07-reactions': { num: 7, name: 'Message Reactions' },
+  'feature-08-edit-history': { num: 8, name: 'Message Editing with History' },
+  'feature-09-permissions': { num: 9, name: 'Real-Time Permissions' },
+  'feature-10-presence': { num: 10, name: 'Rich User Presence' },
+  'feature-11-threading': { num: 11, name: 'Message Threading' },
+  'feature-12-private-rooms': { num: 12, name: 'Private Rooms & DMs' },
+  'feature-13-activity-indicators': { num: 13, name: 'Room Activity Indicators' },
+  'feature-14-draft-sync': { num: 14, name: 'Draft Sync' },
+  'feature-15-anonymous-migration': { num: 15, name: 'Anonymous to Registered Migration' },
+  'feature-16-pinned-messages': { num: 16, name: 'Pinned Messages' },
+  'feature-17-user-profiles': { num: 17, name: 'User Profiles' },
+  'feature-18-mentions-notifications': { num: 18, name: '@Mentions and Notifications' },
+  'feature-19-bookmarked-messages': { num: 19, name: 'Bookmarked/Saved Messages' },
+  'feature-20-message-forwarding': { num: 20, name: 'Message Forwarding' },
+  'feature-21-slow-mode': { num: 21, name: 'Slow Mode' },
+  'feature-22-polls': { num: 22, name: 'Polls' },
+};
+
+// Parse suites → extract test results per feature
+const featureResults = {};
+
+function walkSuites(suites) {
+  for (const suite of suites) {
+    // Match spec file name to feature
+    const specFile = suite.file || '';
+    const featureKey = Object.keys(FEATURES).find((k) => specFile.includes(k));
+
+    if (featureKey && suite.specs) {
+      if (!featureResults[featureKey]) {
+        featureResults[featureKey] = { passed: 0, failed: 0, skipped: 0, tests: [] };
+      }
+      for (const spec of suite.specs) {
+        for (const test of spec.tests || []) {
+          const status = test.status || test.results?.[0]?.status || 'unknown';
+          const testInfo = {
+            title: spec.title,
+            status,
+            duration: test.results?.[0]?.duration || 0,
+          };
+          featureResults[featureKey].tests.push(testInfo);
+          if (status === 'expected' || status === 'passed') {
+            featureResults[featureKey].passed++;
+          } else if (status === 'skipped') {
+            featureResults[featureKey].skipped++;
+          } else {
+            featureResults[featureKey].failed++;
+          }
+        }
+      }
+    }
+
+    if (suite.suites) {
+      walkSuites(suite.suites);
+    }
+  }
+}
+
+walkSuites(results.suites || []);
+
+// Calculate scores: 3 points per feature, proportional to pass rate
+// Skipped tests don't count toward total (they're unimplemented)
+function calcScore(fr) {
+  const total = fr.passed + fr.failed;
+  if (total === 0) return 0; // all skipped = 0
+  const ratio = fr.passed / total;
+  if (ratio >= 1.0) return 3;
+  if (ratio >= 0.66) return 2;
+  if (ratio >= 0.33) return 1;
+  return 0;
+}
+
+// Generate report
+const date = new Date().toISOString().slice(0, 10);
+let totalScore = 0;
+let totalMax = 0;
+const featureLines = [];
+const summaryRows = [];
+
+for (const [key, feat] of Object.entries(FEATURES)) {
+  const fr = featureResults[key];
+  if (!fr) continue; // skip features that weren't tested (not in the spec files run)
+  const score = calcScore(fr);
+  totalScore += score;
+  totalMax += 3;
+
+  const testDetails = fr
+    ? fr.tests
+        .map((t) => {
+          const icon = t.status === 'expected' || t.status === 'passed' ? 'x' : ' ';
+          return `- [${icon}] ${t.title} (${t.status}, ${t.duration}ms)`;
+        })
+        .join('\n')
+    : '- [ ] No tests ran';
+
+  featureLines.push(`## Feature ${feat.num}: ${feat.name} (Score: ${score} / 3)\n\n${testDetails}\n`);
+  const notes = fr
+    ? `${fr.passed}/${fr.passed + fr.failed} passed, ${fr.skipped} skipped`
+    : 'No tests';
+  summaryRows.push(
+    `| ${feat.num}. ${feat.name} | 3 | ${score} | ${notes} |`
+  );
+}
+
+const report = `# Chat App Grading Results
+
+**Model:** Playwright (automated)
+**Date:** ${date}
+**Backend:** ${backend}
+**Grading Method:** Playwright automated tests
+
+---
+
+## Overall Metrics
+
+| Metric                  | Value                          |
+| ----------------------- | ------------------------------ |
+| **Features Evaluated**  | 1-15                           |
+| **Total Feature Score** | ${totalScore} / ${totalMax}    |
+
+---
+
+${featureLines.join('\n---\n\n')}
+
+---
+
+## Summary Score Sheet
+
+| Feature | Max | Score | Notes |
+|---------|-----|-------|-------|
+${summaryRows.join('\n')}
+| **TOTAL** | **${totalMax}** | **${totalScore}** | |
+`;
+
+const outputPath = path.join(appDir, 'GRADING_RESULTS.md');
+fs.writeFileSync(outputPath, report);
+console.log(`GRADING_RESULTS.md written to: ${outputPath}`);
+console.log(`Total score: ${totalScore}/${totalMax}`);
diff --git a/tools/llm-sequential-upgrade/parse-telemetry.mjs b/tools/llm-sequential-upgrade/parse-telemetry.mjs
new file mode 100644
index 00000000000..b24208780bc
--- /dev/null
+++ b/tools/llm-sequential-upgrade/parse-telemetry.mjs
@@ -0,0 +1,311 @@
+#!/usr/bin/env node
+
+/**
+ * Parses OpenTelemetry logs from Claude Code sessions
+ * and generates a COST_REPORT.md with exact token counts.
+ *
+ * Usage:
+ *   node parse-telemetry.mjs <run-dir>
+ *
+ * Reads: telemetry/logs.jsonl (OTLP JSON log records)
+ * Writes: <run-dir>/COST_REPORT.md
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+const runDir = process.argv[2];
+// Parse optional arguments (positional or --key=value)
+let endTimeOverride = null;
+let logsFileOverride = null;
+let extractRaw = false;
+for (let i = 3; i < process.argv.length; i++) {
+  const arg = process.argv[i];
+  if (arg.startsWith('--logs-file=')) {
+    logsFileOverride = arg.split('=').slice(1).join('=');
+  } else if (arg.startsWith('--end-time=')) {
+    endTimeOverride = arg.split('=').slice(1).join('=');
+  } else if (arg === '--extract-raw') {
+    extractRaw = true;
+  } else if (!arg.startsWith('--')) {
+    endTimeOverride = arg; // legacy positional arg
+  }
+}
+if (!runDir) {
+  console.error('Usage: node parse-telemetry.mjs <run-dir> [--logs-file=<path>] [--end-time=<iso>]');
+  console.error('  --logs-file: path to logs.jsonl (default: <run-dir>/../logs.jsonl)');
+  console.error('  --end-time:  upper bound for time filtering (e.g. "2026-03-30T22:00:00Z")');
+  process.exit(1);
+}
+
+// Locate logs.jsonl: explicit path, or derive from run dir parent
+const logsFile = logsFileOverride
+  || path.join(path.dirname(path.resolve(runDir)), 'logs.jsonl');
+
+if (!fs.existsSync(logsFile)) {
+  console.error(`Telemetry file not found: ${logsFile}`);
+  console.error('Make sure the OTel Collector is running and Claude Code has CLAUDE_CODE_ENABLE_TELEMETRY=1');
+  process.exit(1);
+}
+
+// Read metadata
+const metadataFile = path.join(runDir, 'metadata.json');
+const metadata = fs.existsSync(metadataFile)
+  ? JSON.parse(fs.readFileSync(metadataFile, 'utf-8'))
+  : { level: '?', backend: '?', timestamp: '?' };
+
+// Session-ID filtering: prefer session.id match over time-range-only filtering.
+// When both backends run in parallel, time ranges overlap — session ID is the
+// only reliable way to attribute telemetry records to the correct run.
+const sessionId = metadata.sessionId || null;
+const runId = metadata.runId || null;
+
+if (sessionId) {
+  console.log(`Session-ID filtering enabled: session.id=${sessionId}`);
+} else {
+  console.warn('WARNING: No sessionId in metadata — falling back to time-range-only filtering.');
+  console.warn('         Results may include records from other concurrent runs.');
+}
+
+// Time-range filtering: only include records from this run's time window
+const startTime = metadata.startedAtUtc || metadata.startedAt;
+const endTime = endTimeOverride || metadata.endedAtUtc || metadata.endedAt;
+const startMs = startTime ? new Date(startTime).getTime() : 0;
+const endMs = endTime ? new Date(endTime).getTime() : Date.now();
+
+if (!endTime) {
+  console.warn('WARNING: No end time found in metadata — using current time as upper bound.');
+  console.warn('         The run may have crashed or the metadata update failed.');
+}
+console.log(`Filtering telemetry: ${startTime || '(start)'} → ${endTime || '(now)'}`);
+
+// Parse OTLP log records
+// The format depends on the collector version, but generally each line is a JSON object
+// containing log records with attributes that include token counts.
+const lines = fs.readFileSync(logsFile, 'utf-8').trim().split('\n').filter(Boolean);
+
+const apiCalls = [];
+const matchedRawLines = []; // raw lines that passed all filters (for --extract-raw)
+let totalInput = 0;
+let totalOutput = 0;
+let totalCacheRead = 0;
+let totalCacheCreation = 0;
+let totalCostUsd = 0;
+
+let skippedOutOfRange = 0;
+let skippedNonApi = 0;
+let skippedWrongSession = 0;
+
+for (const line of lines) {
+  try {
+    const record = JSON.parse(line);
+
+    // OTLP log records can be nested in different ways depending on the collector.
+    // We look for attributes containing token counts.
+    const attrs = extractAttributes(record);
+
+    // Extract resource-level attributes (contain session.id, run.id from OTEL_RESOURCE_ATTRIBUTES)
+    const resourceAttrs = extractResourceAttributes(record);
+
+    // Filter by session ID (if available in metadata)
+    // This is the primary filter when both backends run in parallel on the same collector.
+    if (sessionId) {
+      const recordSessionId = resourceAttrs['session.id'];
+      const recordRunId = resourceAttrs['run.id'];
+      if (recordSessionId || recordRunId) {
+        // Record has session tags — must match
+        if (recordSessionId !== sessionId && recordRunId !== runId) {
+          skippedWrongSession++;
+          continue;
+        }
+      }
+      // else: record has no session tags (older telemetry) — fall through to time-range filter
+    }
+
+    // Filter by time range — only include records within this run's window
+    const eventTimestamp = attrs['event.timestamp'] || attrs.timestamp;
+    if (eventTimestamp) {
+      const eventMs = new Date(eventTimestamp).getTime();
+      if (eventMs < startMs || eventMs > endMs) {
+        skippedOutOfRange++;
+        continue;
+      }
+    }
+
+    // This record passed session-ID and time-range filters — collect for raw extraction
+    if (extractRaw) {
+      matchedRawLines.push(line);
+    }
+
+    // Filter by event type — only api_request records have token data
+    if (attrs._eventType && attrs._eventType !== 'claude_code.api_request') {
+      skippedNonApi++;
+      continue;
+    }
+
+    if (attrs.input_tokens !== undefined || attrs['input_tokens'] !== undefined) {
+      const call = {
+        inputTokens: Number(attrs.input_tokens || attrs['input_tokens'] || 0),
+        outputTokens: Number(attrs.output_tokens || attrs['output_tokens'] || 0),
+        cacheReadTokens: Number(attrs.cache_read_tokens || attrs['cache_read_tokens'] || 0),
+        cacheCreationTokens: Number(attrs.cache_creation_tokens || attrs['cache_creation_tokens'] || 0),
+        costUsd: Number(attrs.cost_usd || attrs['cost_usd'] || 0),
+        model: attrs.model || attrs['model'] || 'unknown',
+        durationMs: Number(attrs.duration_ms || attrs['duration_ms'] || 0),
+        timestamp: eventTimestamp || record.timeUnixNano || '',
+      };
+
+      apiCalls.push(call);
+      totalInput += call.inputTokens;
+      totalOutput += call.outputTokens;
+      totalCacheRead += call.cacheReadTokens;
+      totalCacheCreation += call.cacheCreationTokens;
+      totalCostUsd += call.costUsd;
+    }
+  } catch {
+    // Skip unparseable lines
+  }
+}
+
+// Generate report
+const totalTokens = totalInput + totalOutput;
+const totalDurationSec = apiCalls.reduce((sum, c) => sum + c.durationMs, 0) / 1000;
+
+const report = `# Cost Report
+
+**App:** chat-app
+**Backend:** ${metadata.backend}
+**Level:** ${metadata.level}
+**Date:** ${new Date().toISOString().slice(0, 10)}
+**Started:** ${metadata.startedAt || metadata.timestamp}
+
+## Summary
+
+| Metric                  | Value |
+|-------------------------|-------|
+| Total input tokens      | ${totalInput.toLocaleString()} |
+| Total output tokens     | ${totalOutput.toLocaleString()} |
+| Total tokens            | ${totalTokens.toLocaleString()} |
+| Cache read tokens       | ${totalCacheRead.toLocaleString()} |
+| Cache creation tokens   | ${totalCacheCreation.toLocaleString()} |
+| Total cost (USD)        | $${totalCostUsd.toFixed(4)} |
+| Total API time          | ${totalDurationSec.toFixed(1)}s |
+| API calls               | ${apiCalls.length} |
+
+## Per-Call Breakdown
+
+| # | Model | Input | Output | Cache Read | Cost | Duration |
+|---|-------|-------|--------|------------|------|----------|
+${apiCalls.map((c, i) =>
+  `| ${i + 1} | ${c.model} | ${c.inputTokens.toLocaleString()} | ${c.outputTokens.toLocaleString()} | ${c.cacheReadTokens.toLocaleString()} | $${c.costUsd.toFixed(4)} | ${(c.durationMs / 1000).toFixed(1)}s |`
+).join('\n')}
+
+## Notes
+
+- Token counts are exact values from Claude Code's OpenTelemetry instrumentation
+- Cache read tokens represent prompt caching (repeated context sent at reduced cost)
+- Total cost includes both input and output token pricing
+`;
+
+const reportPath = path.join(runDir, 'COST_REPORT.md');
+fs.writeFileSync(reportPath, report);
+
+console.log(`Parsed ${apiCalls.length} API calls from ${lines.length} telemetry records.`);
+console.log(`  Skipped: ${skippedOutOfRange} out of time range, ${skippedNonApi} non-API events, ${skippedWrongSession} wrong session`);
+console.log(`Total tokens: ${totalTokens.toLocaleString()} (${totalInput.toLocaleString()} in / ${totalOutput.toLocaleString()} out)`);
+console.log(`Total cost: $${totalCostUsd.toFixed(4)}`);
+console.log(`Report saved to: ${reportPath}`);
+
+// Write raw telemetry extract if requested
+if (extractRaw && matchedRawLines.length > 0) {
+  const rawPath = path.join(runDir, 'raw-telemetry.jsonl');
+  fs.writeFileSync(rawPath, matchedRawLines.join('\n') + '\n');
+  console.log(`Raw telemetry: ${matchedRawLines.length} records saved to ${rawPath}`);
+}
+
+// Write machine-readable summary alongside the markdown report
+const summaryPath = path.join(runDir, 'cost-summary.json');
+fs.writeFileSync(summaryPath, JSON.stringify({
+  backend: metadata.backend,
+  level: metadata.level,
+  variant: metadata.variant,
+  rules: metadata.rules,
+  runIndex: metadata.runIndex,
+  sessionId: metadata.sessionId,
+  startedAt: metadata.startedAtUtc || metadata.startedAt,
+  endedAt: metadata.endedAtUtc || metadata.endedAt,
+  totalInputTokens: totalInput,
+  totalOutputTokens: totalOutput,
+  totalTokens,
+  cacheReadTokens: totalCacheRead,
+  cacheCreationTokens: totalCacheCreation,
+  totalCostUsd,
+  apiCalls: apiCalls.length,
+  totalDurationSec: apiCalls.reduce((sum, c) => sum + c.durationMs, 0) / 1000,
+}, null, 2));
+console.log(`Cost summary JSON: ${summaryPath}`);
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Extract attributes from an OTLP log record.
+ * The structure varies by collector version and export format.
+ */
+function extractAttributes(record) {
+  const attrs = {};
+
+  // Direct attributes
+  if (record.attributes) {
+    flattenAttributes(record.attributes, attrs);
+  }
+
+  // Nested in resourceLogs → scopeLogs → logRecords
+  if (record.resourceLogs) {
+    for (const rl of record.resourceLogs) {
+      for (const sl of rl.scopeLogs || []) {
+        for (const lr of sl.logRecords || []) {
+          // Capture event type from body (e.g. "claude_code.api_request")
+          if (lr.body?.stringValue) {
+            attrs._eventType = lr.body.stringValue;
+          }
+          if (lr.attributes) {
+            flattenAttributes(lr.attributes, attrs);
+          }
+          if (lr.body?.kvlistValue?.values) {
+            flattenAttributes(lr.body.kvlistValue.values, attrs);
+          }
+        }
+      }
+    }
+  }
+
+  return attrs;
+}
+
+/**
+ * Extract resource-level attributes from an OTLP record.
+ * These contain OTEL_RESOURCE_ATTRIBUTES values (session.id, run.id).
+ */
+function extractResourceAttributes(record) {
+  const attrs = {};
+  if (record.resourceLogs) {
+    for (const rl of record.resourceLogs) {
+      if (rl.resource?.attributes) {
+        flattenAttributes(rl.resource.attributes, attrs);
+      }
+    }
+  }
+  return attrs;
+}
+
+function flattenAttributes(attrList, out) {
+  if (Array.isArray(attrList)) {
+    for (const kv of attrList) {
+      if (kv.key && kv.value) {
+        out[kv.key] = kv.value.stringValue || kv.value.intValue || kv.value.doubleValue || kv.value.boolValue;
+      }
+    }
+  } else if (typeof attrList === 'object') {
+    Object.assign(out, attrList);
+  }
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/.gitignore b/tools/llm-sequential-upgrade/perf-benchmark/.gitignore
new file mode 100644
index 00000000000..346f28a154a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+results/
+*.log
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/README.md b/tools/llm-sequential-upgrade/perf-benchmark/README.md
new file mode 100644
index 00000000000..cca16d6e6c4
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/README.md
@@ -0,0 +1,68 @@
+# Perf Benchmark - PG vs STDB Chat Apps
+
+Runtime performance harness for the Level 12 chat apps the LLM built in the
+sequential upgrade benchmark. Measures messages-per-second throughput and
+latency so we have showcase numbers for the marketing one-pager.
+
+This is **not** a synthetic benchmark of PostgreSQL vs SpacetimeDB. It's a
+benchmark of *the apps the LLM built on each stack*, run as-is.
+
+## What it tests
+
+| Scenario | What it measures |
+|---|---|
+| `stress` | N writers flooding `send_message` for D seconds. Sustained msgs/sec + p99 latency. |
+| `realistic` | M users at human cadence (5-15s jitter) for D seconds. Sustained msgs/sec + latency under realistic load. |
+
+## Setup
+
+```bash
+npm install
+
+# Generate SpacetimeDB bindings against the target Level 12 app's backend.
+# Re-run this if you change which app you're benchmarking.
+spacetime generate --lang typescript --out-dir src/module_bindings \
+  --module-path ../sequential-upgrade/sequential-upgrade-20260406/spacetime/results/chat-app-20260406-153727/backend/spacetimedb
+```
+
+## Prerequisites for running
+
+The target apps must already be running:
+
+- **Postgres**: `cd <pg-app>/server && npm run dev` (Express on `:6001`),
+  plus the `exhaust-test-postgres-1` Docker container (port 6432).
+- **SpacetimeDB**: local `spacetime start` running, and the target module
+  must be published (the apps publish themselves automatically when generated).
+
+## Run
+
+```bash
+# PG stress, 30s, 20 writers
+npm run run -- --backend pg --scenario stress --writers 20 --duration 30
+
+# STDB stress, 30s, 50 writers
+npm run run -- --backend stdb --scenario stress --writers 50 --duration 30 \
+  --module chat-app-20260406-153727
+
+# Both throughput scenarios for one backend
+npm run run -- --backend pg --scenario all
+npm run run -- --backend stdb --scenario all --module chat-app-20260406-153727
+```
+
+Results land in `results/<timestamp>/<backend>-<scenario>.json`.
+Saved optimized-reference snapshots also live under
+`results/optimized-reference/`.
+Tracked reference implementations and methodology live in
+`optimized-reference/`.
+
+## Caveats
+
+- The PG app's `send_message` handler enforces a **500ms-per-user rate limit**
+  in application code. Each PG writer can therefore issue at most ~2 msgs/sec.
+  Throughput scales with writers, not with cadence. The harness paces writers
+  at ~510ms to avoid drops. SpacetimeDB has no equivalent limit, so its
+  per-writer ceiling is much higher.
+- Numbers reflect what shipped from the LLM, on a single dev machine, against
+  a local DB. They are not the theoretical ceiling of either backend.
+- Each connection in the harness uses the same Node process clock, so fan-out
+  latency is meaningful (no clock skew across machines).
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md
new file mode 100644
index 00000000000..67813bb4120
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md
@@ -0,0 +1,46 @@
+﻿# Optimized Reference Versions
+
+These files contain the optimized reference versions of the `sendMessage` handlers.
+There are only two comparison points:
+- Raw: the AI-generated baseline
+- Optimized: the one-pass improved implementation
+
+## What changed (STDB)
+
+Same features as AI-generated. Implementation changes only:
+- Membership check: use `userIdentity.filter(ctx.sender)` instead of `roomId.filter(roomId)` + `[...spread]` + `toHexString()` string allocation
+- Read receipt update: same fix (identity index, no spread, no string alloc)
+- Typing indicator cleanup: same fix
+- User existence check: kept
+- Room existence check: kept
+- Message insert: kept
+- All validation: kept
+
+## What changed (PG 20260406)
+
+Same features as AI-generated. Implementation changes only:
+- Rate limit: kept
+- Banned check: kept
+- Membership check: kept
+- Message insert: kept (blocking await, need the result)
+- Room emit: kept
+- lastSeen update: made non-blocking (fire without await)
+- Notification fanout query + loop: made non-blocking (fire without await)
+- Thread reply counting: kept
+- Typing indicator cleanup: kept
+- Activity tracking: kept
+
+## What changed (PG 20260403)
+
+Same features as AI-generated. Implementation changes only:
+- Message insert: kept (blocking await)
+- User lookup for username: made non-blocking (fire without await, emit after lookup resolves)
+- Room activity broadcast: kept
+- Response sent immediately after insert instead of after user lookup
+
+## Benchmark results (averaged across 2 runs)
+
+| Version | STDB avg | PG avg | Ratio |
+|------|----------|--------|-------|
+| Raw | 5,267 msgs/sec | 694 msgs/sec | 7.6x |
+| Optimized (this dir) | 25,278 msgs/sec | 1,139 msgs/sec | 22x |
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/pg-index-optimized.ts b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/pg-index-optimized.ts
new file mode 100644
index 00000000000..b23b51da0b4
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/pg-index-optimized.ts
@@ -0,0 +1,1591 @@
+﻿import express from 'express';
+import { createServer } from 'http';
+import { Server } from 'socket.io';
+import { drizzle } from 'drizzle-orm/node-postgres';
+import { Pool } from 'pg';
+import * as schema from './schema.js';
+import { eq, and, inArray, lte, gt, isNotNull, isNull, or, count as drizzleCount } from 'drizzle-orm';
+import cors from 'cors';
+import dotenv from 'dotenv';
+
+dotenv.config();
+
+const app = express();
+const httpServer = createServer(app);
+
+const io = new Server(httpServer, {
+  cors: {
+    origin: 'http://localhost:6273',
+    methods: ['GET', 'POST'],
+  },
+});
+
+app.use(cors({ origin: 'http://localhost:6273' }));
+app.use(express.json());
+
+const pool = new Pool({ connectionString: process.env.DATABASE_URL });
+const db = drizzle(pool, { schema });
+
+// In-memory typing state: roomId -> Map<userId, { timer, userName }>
+const typingState = new Map<number, Map<number, { timer: NodeJS.Timeout; userName: string }>>();
+
+// Activity tracking: roomId -> array of recent message timestamps (ms)
+const roomActivity = new Map<number, number[]>();
+const ACTIVITY_WINDOW_MS = 5 * 60 * 1000; // 5 minutes
+const HOT_THRESHOLD = 5; // 5+ messages in window = hot
+// Track last emitted level per room to detect changes
+const lastEmittedActivityLevel = new Map<number, 'hot' | 'active' | null>();
+
+function computeActivityLevel(roomId: number): 'hot' | 'active' | null {
+  const now = Date.now();
+  const recent = (roomActivity.get(roomId) ?? []).filter((t) => now - t < ACTIVITY_WINDOW_MS);
+  roomActivity.set(roomId, recent);
+  if (recent.length >= HOT_THRESHOLD) return 'hot';
+  if (recent.length >= 1) return 'active';
+  return null;
+}
+
+function recordRoomMessage(roomId: number) {
+  const timestamps = roomActivity.get(roomId) ?? [];
+  timestamps.push(Date.now());
+  roomActivity.set(roomId, timestamps);
+  const level = computeActivityLevel(roomId);
+  lastEmittedActivityLevel.set(roomId, level);
+  io.emit('room_activity_update', { roomId, level });
+}
+
+// Background job: periodically recalculate activity levels so badges reset when rooms go quiet
+setInterval(() => {
+  for (const roomId of roomActivity.keys()) {
+    const newLevel = computeActivityLevel(roomId);
+    const prev = lastEmittedActivityLevel.get(roomId) ?? null;
+    if (newLevel !== prev) {
+      lastEmittedActivityLevel.set(roomId, newLevel);
+      io.emit('room_activity_update', { roomId, level: newLevel });
+    }
+  }
+}, 30000); // check every 30 seconds
+
+// Socket to user mapping
+const connectedUsers = new Map<string, { id: number; name: string }>();
+const userSockets = new Map<number, string>();
+
+// Rate limiting: userId -> last message timestamp
+const lastMessageTime = new Map<number, number>();
+
+// ─── REST API ─────────────────────────────────────────────────────────────────
+
+// Create or get user by name (or create anonymous user)
+app.post('/api/users', async (req, res) => {
+  const { name, anonymous } = req.body as { name?: string; anonymous?: boolean };
+
+  // Anonymous join: generate a unique guest name
+  if (anonymous) {
+    try {
+      let guestName: string;
+      let attempts = 0;
+      do {
+        const suffix = Math.floor(1000 + Math.random() * 9000);
+        guestName = `Guest_${suffix}`;
+        const existing = await db.select({ id: schema.users.id }).from(schema.users).where(eq(schema.users.name, guestName));
+        if (existing.length === 0) break;
+        attempts++;
+      } while (attempts < 20);
+      const [user] = await db.insert(schema.users).values({ name: guestName!, isAnonymous: true }).returning();
+      return res.json(user);
+    } catch {
+      return res.status(500).json({ error: 'Failed to create anonymous user' });
+    }
+  }
+
+  if (!name || name.trim().length === 0) {
+    return res.status(400).json({ error: 'Name required' });
+  }
+  if (name.trim().length > 30) {
+    return res.status(400).json({ error: 'Name must be 30 characters or fewer' });
+  }
+
+  try {
+    let [user] = await db.select().from(schema.users).where(eq(schema.users.name, name.trim()));
+    if (!user) {
+      [user] = await db.insert(schema.users).values({ name: name.trim() }).returning();
+    }
+    res.json(user);
+  } catch {
+    res.status(500).json({ error: 'Failed to create user' });
+  }
+});
+
+// Register an anonymous user (give them a real name)
+app.post('/api/users/:id/register', async (req, res) => {
+  const userId = parseInt(req.params.id);
+  const { name } = req.body as { name?: string };
+  if (!name || name.trim().length === 0) {
+    return res.status(400).json({ error: 'Name required' });
+  }
+  if (name.trim().length > 30) {
+    return res.status(400).json({ error: 'Name must be 30 characters or fewer' });
+  }
+  // Disallow Guest_ prefix for registered users
+  if (name.trim().startsWith('Guest_')) {
+    return res.status(400).json({ error: 'Name cannot start with "Guest_"' });
+  }
+
+  try {
+    const [existing] = await db.select({ id: schema.users.id }).from(schema.users).where(eq(schema.users.name, name.trim()));
+    if (existing) {
+      return res.status(409).json({ error: 'Name already taken' });
+    }
+
+    const [user] = await db
+      .update(schema.users)
+      .set({ name: name.trim(), isAnonymous: false })
+      .where(eq(schema.users.id, userId))
+      .returning();
+
+    if (!user) return res.status(404).json({ error: 'User not found' });
+
+    // Broadcast name change so all clients can update their UI
+    io.emit('user_renamed', { userId, newName: name.trim() });
+
+    res.json(user);
+  } catch {
+    res.status(500).json({ error: 'Failed to register user' });
+  }
+});
+
+// Get online users (excludes invisible)
+app.get('/api/users/online', async (_req, res) => {
+  try {
+    const users = await db.select().from(schema.users).where(eq(schema.users.online, true));
+    // Filter out invisible users from the public online list
+    res.json(users.filter((u) => u.status !== 'invisible'));
+  } catch {
+    res.status(500).json({ error: 'Failed to get online users' });
+  }
+});
+
+// Get a single user by ID
+app.get('/api/users/:id', async (req, res) => {
+  const userId = parseInt(req.params.id);
+  if (!userId) return res.status(400).json({ error: 'Invalid user ID' });
+  try {
+    const [user] = await db
+      .select({
+        id: schema.users.id,
+        name: schema.users.name,
+        online: schema.users.online,
+        status: schema.users.status,
+        lastSeen: schema.users.lastSeen,
+        isAnonymous: schema.users.isAnonymous,
+      })
+      .from(schema.users)
+      .where(eq(schema.users.id, userId));
+    if (!user) return res.status(404).json({ error: 'User not found' });
+    res.json(user);
+  } catch {
+    res.status(500).json({ error: 'Failed to get user' });
+  }
+});
+
+// Get all users for presence list
+app.get('/api/users', async (_req, res) => {
+  try {
+    const users = await db
+      .select({
+        id: schema.users.id,
+        name: schema.users.name,
+        online: schema.users.online,
+        status: schema.users.status,
+        lastSeen: schema.users.lastSeen,
+        isAnonymous: schema.users.isAnonymous,
+      })
+      .from(schema.users)
+      .orderBy(schema.users.name);
+    res.json(users);
+  } catch {
+    res.status(500).json({ error: 'Failed to get users' });
+  }
+});
+
+// Update user status via REST
+app.patch('/api/users/:id/status', async (req, res) => {
+  const userId = parseInt(req.params.id);
+  const { status } = req.body as { status?: string };
+  const VALID_STATUSES = ['online', 'away', 'dnd', 'invisible'];
+  if (!status || !VALID_STATUSES.includes(status)) {
+    return res.status(400).json({ error: 'Invalid status' });
+  }
+
+  try {
+    const now = new Date();
+    const [user] = await db
+      .update(schema.users)
+      .set({ status, lastSeen: now })
+      .where(eq(schema.users.id, userId))
+      .returning();
+
+    if (!user) return res.status(404).json({ error: 'User not found' });
+
+    // Broadcast: invisible users appear offline to others
+    const broadcastStatus = status === 'invisible' ? 'offline' : status;
+    io.emit('user_status', {
+      userId,
+      name: user.name,
+      online: broadcastStatus !== 'offline',
+      status: broadcastStatus,
+      lastSeen: now,
+    });
+
+    res.json({ ok: true, status });
+  } catch {
+    res.status(500).json({ error: 'Failed to update status' });
+  }
+});
+
+// List rooms with unread counts (only public rooms + private rooms user is a member of)
+app.get('/api/rooms', async (req, res) => {
+  const userId = parseInt(req.query.userId as string);
+  if (!userId) return res.status(400).json({ error: 'userId required' });
+
+  try {
+    const memberships = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(eq(schema.roomMembers.userId, userId));
+    const joinedRoomIds = memberships.map((m) => m.roomId);
+    const joinedRooms = new Set(joinedRoomIds);
+
+    // Fetch only rooms the user is allowed to see: public OR member of private
+    const rooms = joinedRoomIds.length > 0
+      ? await db.select().from(schema.rooms)
+          .where(or(eq(schema.rooms.isPrivate, false), inArray(schema.rooms.id, joinedRoomIds)))
+          .orderBy(schema.rooms.name)
+      : await db.select().from(schema.rooms)
+          .where(eq(schema.rooms.isPrivate, false))
+          .orderBy(schema.rooms.name);
+
+    // For DM rooms, get partner name
+    const dmPartnerNames: Record<number, string> = {};
+    const dmRoomIds = rooms.filter(r => r.isDm).map(r => r.id);
+    if (dmRoomIds.length > 0) {
+      const allDmMembers = await db
+        .select({ roomId: schema.roomMembers.roomId, userId: schema.roomMembers.userId, name: schema.users.name })
+        .from(schema.roomMembers)
+        .innerJoin(schema.users, eq(schema.roomMembers.userId, schema.users.id))
+        .where(inArray(schema.roomMembers.roomId, dmRoomIds));
+      for (const m of allDmMembers) {
+        if (m.userId !== userId) dmPartnerNames[m.roomId] = m.name;
+      }
+    }
+
+    const roomsWithCounts = await Promise.all(
+      rooms.map(async (room) => {
+        const result = await pool.query<{ count: string }>(
+          `SELECT COUNT(m.id)::int as count
+           FROM messages m
+           LEFT JOIN read_receipts rr ON rr.message_id = m.id AND rr.user_id = $1
+           WHERE m.room_id = $2 AND rr.message_id IS NULL`,
+          [userId, room.id]
+        );
+        return {
+          ...room,
+          unreadCount: parseInt(result.rows[0]?.count ?? '0'),
+          joined: joinedRooms.has(room.id),
+          dmPartnerName: dmPartnerNames[room.id] ?? null,
+        };
+      })
+    );
+
+    res.json(roomsWithCounts);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get rooms' });
+  }
+});
+
+// Get activity levels for all rooms
+app.get('/api/rooms/activity', (_req, res) => {
+  const now = Date.now();
+  const result: Record<number, 'hot' | 'active'> = {};
+  roomActivity.forEach((timestamps, roomId) => {
+    const recent = timestamps.filter((t) => now - t < ACTIVITY_WINDOW_MS);
+    roomActivity.set(roomId, recent);
+    if (recent.length >= HOT_THRESHOLD) result[roomId] = 'hot';
+    else if (recent.length >= 1) result[roomId] = 'active';
+  });
+  res.json(result);
+});
+
+// Create room
+app.post('/api/rooms', async (req, res) => {
+  const { name, userId, isPrivate } = req.body as { name?: string; userId?: number; isPrivate?: boolean };
+  if (!name || name.trim().length === 0) {
+    return res.status(400).json({ error: 'Room name required' });
+  }
+  if (name.trim().length > 50) {
+    return res.status(400).json({ error: 'Room name must be 50 characters or fewer' });
+  }
+
+  try {
+    const [room] = await db
+      .insert(schema.rooms)
+      .values({ name: name.trim(), isPrivate: isPrivate ?? false })
+      .returning();
+
+    if (userId) {
+      // Creator becomes admin
+      await db
+        .insert(schema.roomMembers)
+        .values({ userId, roomId: room.id, isAdmin: true })
+        .onConflictDoNothing();
+    }
+
+    const roomWithMeta = { ...room, unreadCount: 0, joined: userId ? true : false, dmPartnerName: null };
+    if (!room.isPrivate) {
+      // Only broadcast public rooms to all
+      io.emit('room_created', roomWithMeta);
+    } else if (userId) {
+      // Private room: only notify creator
+      const creatorSocketId = userSockets.get(userId);
+      if (creatorSocketId) {
+        io.to(creatorSocketId).emit('room_created', roomWithMeta);
+      }
+    }
+    res.json(roomWithMeta);
+  } catch (e: unknown) {
+    if ((e as { code?: string }).code === '23505') {
+      return res.status(400).json({ error: 'Room name already exists' });
+    }
+    res.status(500).json({ error: 'Failed to create room' });
+  }
+});
+
+// Join room
+app.post('/api/rooms/:id/join', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const { userId } = req.body as { userId: number };
+
+  try {
+    // Check if user is banned
+    const [banned] = await db
+      .select()
+      .from(schema.bannedUsers)
+      .where(and(eq(schema.bannedUsers.userId, userId), eq(schema.bannedUsers.roomId, roomId)));
+    if (banned) return res.status(403).json({ error: 'You are banned from this room' });
+
+    await db
+      .insert(schema.roomMembers)
+      .values({ userId, roomId })
+      .onConflictDoNothing();
+
+    const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+    if (user) {
+      io.to(`room:${roomId}`).emit('member_joined', { userId, name: user.name, isAdmin: false, roomId });
+    }
+
+    res.json({ ok: true });
+  } catch {
+    res.status(500).json({ error: 'Failed to join room' });
+  }
+});
+
+// Get room members
+app.get('/api/rooms/:id/members', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  try {
+    const members = await db
+      .select({
+        userId: schema.roomMembers.userId,
+        isAdmin: schema.roomMembers.isAdmin,
+        name: schema.users.name,
+      })
+      .from(schema.roomMembers)
+      .innerJoin(schema.users, eq(schema.roomMembers.userId, schema.users.id))
+      .where(eq(schema.roomMembers.roomId, roomId));
+    res.json(members);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get members' });
+  }
+});
+
+// Kick user from room (admin only)
+app.post('/api/rooms/:id/kick', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const { adminId, targetUserId } = req.body as { adminId: number; targetUserId: number };
+
+  try {
+    // Verify requester is admin
+    const [adminMember] = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, adminId), eq(schema.roomMembers.roomId, roomId)));
+    if (!adminMember?.isAdmin) return res.status(403).json({ error: 'Only admins can kick users' });
+
+    // Cannot kick another admin
+    const [targetMember] = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, targetUserId), eq(schema.roomMembers.roomId, roomId)));
+    if (targetMember?.isAdmin) return res.status(403).json({ error: 'Cannot kick an admin' });
+
+    // Remove from room and ban
+    await db
+      .delete(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, targetUserId), eq(schema.roomMembers.roomId, roomId)));
+    await db
+      .insert(schema.bannedUsers)
+      .values({ userId: targetUserId, roomId })
+      .onConflictDoNothing();
+
+    // Notify the room (for other members to update their panel)
+    io.to(`room:${roomId}`).emit('user_kicked', { userId: targetUserId, roomId });
+
+    // Emit directly to the kicked user's socket so they are redirected
+    // even if their socket is not (yet) in the socket room
+    const kickedSocketId = userSockets.get(targetUserId);
+    if (kickedSocketId) {
+      const kickedSocket = io.sockets.sockets.get(kickedSocketId);
+      if (kickedSocket) {
+        kickedSocket.emit('kicked_from_room', { roomId });
+        kickedSocket.leave(`room:${roomId}`);
+      }
+    }
+
+    res.json({ ok: true });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to kick user' });
+  }
+});
+
+// Promote user to admin (admin only)
+app.post('/api/rooms/:id/promote', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const { adminId, targetUserId } = req.body as { adminId: number; targetUserId: number };
+
+  try {
+    // Verify requester is admin
+    const [adminMember] = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, adminId), eq(schema.roomMembers.roomId, roomId)));
+    if (!adminMember?.isAdmin) return res.status(403).json({ error: 'Only admins can promote users' });
+
+    // Promote target
+    await db
+      .update(schema.roomMembers)
+      .set({ isAdmin: true })
+      .where(and(eq(schema.roomMembers.userId, targetUserId), eq(schema.roomMembers.roomId, roomId)));
+
+    io.to(`room:${roomId}`).emit('user_promoted', { userId: targetUserId, roomId });
+
+    res.json({ ok: true });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to promote user' });
+  }
+});
+
+// Leave room
+app.post('/api/rooms/:id/leave', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const { userId } = req.body as { userId: number };
+
+  try {
+    await db
+      .delete(schema.roomMembers)
+      .where(
+        and(eq(schema.roomMembers.userId, userId), eq(schema.roomMembers.roomId, roomId))
+      );
+
+    io.to(`room:${roomId}`).emit('member_left', { userId, roomId });
+
+    res.json({ ok: true });
+  } catch {
+    res.status(500).json({ error: 'Failed to leave room' });
+  }
+});
+
+// Invite a user to a private room (admin only)
+app.post('/api/rooms/:id/invite', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const { adminId, inviteeName } = req.body as { adminId: number; inviteeName: string };
+
+  if (!inviteeName?.trim()) return res.status(400).json({ error: 'inviteeName required' });
+
+  try {
+    // Verify requester is admin
+    const [adminMember] = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, adminId), eq(schema.roomMembers.roomId, roomId)));
+    if (!adminMember?.isAdmin) return res.status(403).json({ error: 'Only admins can invite users' });
+
+    // Find invitee by name
+    const [invitee] = await db
+      .select()
+      .from(schema.users)
+      .where(eq(schema.users.name, inviteeName.trim()));
+    if (!invitee) return res.status(404).json({ error: 'User not found' });
+
+    // Check if already a member
+    const [existing] = await db
+      .select()
+      .from(schema.roomMembers)
+      .where(and(eq(schema.roomMembers.userId, invitee.id), eq(schema.roomMembers.roomId, roomId)));
+    if (existing) return res.status(400).json({ error: 'User is already a member' });
+
+    // Check if already banned
+    const [banned] = await db
+      .select()
+      .from(schema.bannedUsers)
+      .where(and(eq(schema.bannedUsers.userId, invitee.id), eq(schema.bannedUsers.roomId, roomId)));
+    if (banned) return res.status(400).json({ error: 'User is banned from this room' });
+
+    // Check if pending invitation already exists
+    const [pendingInv] = await db
+      .select()
+      .from(schema.roomInvitations)
+      .where(and(
+        eq(schema.roomInvitations.roomId, roomId),
+        eq(schema.roomInvitations.inviteeId, invitee.id),
+        eq(schema.roomInvitations.status, 'pending')
+      ));
+    if (pendingInv) return res.status(400).json({ error: 'Invitation already pending' });
+
+    const [room] = await db.select().from(schema.rooms).where(eq(schema.rooms.id, roomId));
+    const [inviter] = await db.select().from(schema.users).where(eq(schema.users.id, adminId));
+
+    const [invitation] = await db
+      .insert(schema.roomInvitations)
+      .values({ roomId, inviterId: adminId, inviteeId: invitee.id })
+      .returning();
+
+    // Notify invitee via socket
+    const inviteeSocketId = userSockets.get(invitee.id);
+    if (inviteeSocketId) {
+      io.to(inviteeSocketId).emit('invitation_received', {
+        id: invitation.id,
+        roomId,
+        roomName: room?.name ?? '',
+        inviterName: inviter?.name ?? '',
+        createdAt: invitation.createdAt,
+      });
+    }
+
+    res.json({ ok: true, invitation });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to invite user' });
+  }
+});
+
+// Get pending invitations for a user
+app.get('/api/invitations', async (req, res) => {
+  const userId = parseInt(req.query.userId as string);
+  if (!userId) return res.status(400).json({ error: 'userId required' });
+
+  try {
+    const invitations = await pool.query<{
+      id: number; room_id: number; room_name: string;
+      inviter_id: number; inviter_name: string; created_at: Date;
+    }>(
+      `SELECT ri.id, ri.room_id, r.name as room_name, ri.inviter_id, u.name as inviter_name, ri.created_at
+       FROM room_invitations ri
+       JOIN rooms r ON r.id = ri.room_id
+       JOIN users u ON u.id = ri.inviter_id
+       WHERE ri.invitee_id = $1 AND ri.status = 'pending'
+       ORDER BY ri.created_at DESC`,
+      [userId]
+    );
+    res.json(invitations.rows.map(r => ({
+      id: r.id, roomId: r.room_id, roomName: r.room_name,
+      inviterId: r.inviter_id, inviterName: r.inviter_name, createdAt: r.created_at,
+    })));
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get invitations' });
+  }
+});
+
+// Accept an invitation
+app.post('/api/invitations/:id/accept', async (req, res) => {
+  const invitationId = parseInt(req.params.id);
+  const { userId } = req.body as { userId: number };
+
+  try {
+    const [invitation] = await db
+      .update(schema.roomInvitations)
+      .set({ status: 'accepted' })
+      .where(and(
+        eq(schema.roomInvitations.id, invitationId),
+        eq(schema.roomInvitations.inviteeId, userId),
+        eq(schema.roomInvitations.status, 'pending')
+      ))
+      .returning();
+
+    if (!invitation) return res.status(404).json({ error: 'Invitation not found' });
+
+    // Add user to room
+    await db
+      .insert(schema.roomMembers)
+      .values({ userId, roomId: invitation.roomId })
+      .onConflictDoNothing();
+
+    const [room] = await db.select().from(schema.rooms).where(eq(schema.rooms.id, invitation.roomId));
+    const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+
+    // Notify room members
+    io.to(`room:${invitation.roomId}`).emit('member_joined', {
+      userId, name: user?.name ?? '', isAdmin: false, roomId: invitation.roomId,
+    });
+
+    // Get DM partner name if DM
+    let dmPartnerName: string | null = null;
+    if (room?.isDm) {
+      const members = await db
+        .select({ userId: schema.roomMembers.userId, name: schema.users.name })
+        .from(schema.roomMembers)
+        .innerJoin(schema.users, eq(schema.roomMembers.userId, schema.users.id))
+        .where(eq(schema.roomMembers.roomId, invitation.roomId));
+      const partner = members.find(m => m.userId !== userId);
+      dmPartnerName = partner?.name ?? null;
+    }
+
+    // Send room info to the new member
+    const roomWithMeta = {
+      ...room,
+      unreadCount: 0,
+      joined: true,
+      dmPartnerName,
+    };
+    const userSocketId = userSockets.get(userId);
+    if (userSocketId) {
+      io.to(userSocketId).emit('room_created', roomWithMeta);
+    }
+
+    res.json({ ok: true, room: roomWithMeta });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to accept invitation' });
+  }
+});
+
+// Decline an invitation
+app.post('/api/invitations/:id/decline', async (req, res) => {
+  const invitationId = parseInt(req.params.id);
+  const { userId } = req.body as { userId: number };
+
+  try {
+    const [invitation] = await db
+      .update(schema.roomInvitations)
+      .set({ status: 'declined' })
+      .where(and(
+        eq(schema.roomInvitations.id, invitationId),
+        eq(schema.roomInvitations.inviteeId, userId),
+        eq(schema.roomInvitations.status, 'pending')
+      ))
+      .returning();
+
+    if (!invitation) return res.status(404).json({ error: 'Invitation not found' });
+    res.json({ ok: true });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to decline invitation' });
+  }
+});
+
+// Create or get DM room between two users
+app.post('/api/dm', async (req, res) => {
+  const { userId, partnerId } = req.body as { userId: number; partnerId: number };
+  if (!userId || !partnerId || userId === partnerId) {
+    return res.status(400).json({ error: 'userId and partnerId required and must be different' });
+  }
+
+  try {
+    const [partner] = await db.select().from(schema.users).where(eq(schema.users.id, partnerId));
+    if (!partner) return res.status(404).json({ error: 'Partner not found' });
+
+    // Check if DM room already exists between these two users
+    const existing = await pool.query<{ id: number }>(
+      `SELECT r.id FROM rooms r
+       WHERE r.is_dm = true
+       AND (SELECT COUNT(*) FROM room_members rm WHERE rm.room_id = r.id AND rm.user_id IN ($1, $2)) = 2
+       AND (SELECT COUNT(*) FROM room_members rm WHERE rm.room_id = r.id) = 2
+       LIMIT 1`,
+      [userId, partnerId]
+    );
+
+    if (existing.rows.length > 0) {
+      const roomId = existing.rows[0].id;
+      const [room] = await db.select().from(schema.rooms).where(eq(schema.rooms.id, roomId));
+      const roomWithMeta = { ...room, unreadCount: 0, joined: true, dmPartnerName: partner.name };
+
+      // Ensure both users are still members
+      await db.insert(schema.roomMembers).values({ userId, roomId }).onConflictDoNothing();
+      await db.insert(schema.roomMembers).values({ userId: partnerId, roomId }).onConflictDoNothing();
+
+      return res.json(roomWithMeta);
+    }
+
+    // Create DM room
+    const dmName = `dm:${Math.min(userId, partnerId)}-${Math.max(userId, partnerId)}`;
+    const [room] = await db
+      .insert(schema.rooms)
+      .values({ name: dmName, isPrivate: true, isDm: true })
+      .returning();
+
+    await db.insert(schema.roomMembers).values({ userId, roomId: room.id }).onConflictDoNothing();
+    await db.insert(schema.roomMembers).values({ userId: partnerId, roomId: room.id }).onConflictDoNothing();
+
+    const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+
+    const roomForUser = { ...room, unreadCount: 0, joined: true, dmPartnerName: partner.name };
+    const roomForPartner = { ...room, unreadCount: 0, joined: true, dmPartnerName: user?.name ?? '' };
+
+    // Notify requester
+    const userSocketId = userSockets.get(userId);
+    if (userSocketId) io.to(userSocketId).emit('room_created', roomForUser);
+
+    // Notify partner
+    const partnerSocketId = userSockets.get(partnerId);
+    if (partnerSocketId) io.to(partnerSocketId).emit('room_created', roomForPartner);
+
+    res.json(roomForUser);
+  } catch (e: unknown) {
+    if ((e as { code?: string }).code === '23505') {
+      // Race condition: DM already exists, retry lookup
+      const existing = await pool.query<{ id: number }>(
+        `SELECT r.id FROM rooms r
+         WHERE r.is_dm = true
+         AND (SELECT COUNT(*) FROM room_members rm WHERE rm.room_id = r.id AND rm.user_id IN ($1, $2)) = 2
+         LIMIT 1`,
+        [userId, partnerId]
+      );
+      if (existing.rows.length > 0) {
+        const [room] = await db.select().from(schema.rooms).where(eq(schema.rooms.id, existing.rows[0].id));
+        const [partner] = await db.select().from(schema.users).where(eq(schema.users.id, partnerId));
+        return res.json({ ...room, unreadCount: 0, joined: true, dmPartnerName: partner?.name ?? '' });
+      }
+    }
+    console.error(e);
+    res.status(500).json({ error: 'Failed to create DM' });
+  }
+});
+
+// Get messages for a room (marks all as read for userId)
+app.get('/api/rooms/:id/messages', async (req, res) => {
+  const roomId = parseInt(req.params.id);
+  const userId = parseInt(req.query.userId as string);
+
+  try {
+    // Verify user is a member and not banned
+    if (userId) {
+      const [banned] = await db
+        .select()
+        .from(schema.bannedUsers)
+        .where(and(eq(schema.bannedUsers.userId, userId), eq(schema.bannedUsers.roomId, roomId)));
+      if (banned) return res.status(403).json({ error: 'You are banned from this room' });
+
+      const [membership] = await db
+        .select()
+        .from(schema.roomMembers)
+        .where(and(eq(schema.roomMembers.userId, userId), eq(schema.roomMembers.roomId, roomId)));
+      if (!membership) return res.status(403).json({ error: 'You are not a member of this room' });
+    }
+
+    const msgs = await db
+      .select({
+        id: schema.messages.id,
+        roomId: schema.messages.roomId,
+        userId: schema.messages.userId,
+        content: schema.messages.content,
+        expiresAt: schema.messages.expiresAt,
+        editedAt: schema.messages.editedAt,
+        parentMessageId: schema.messages.parentMessageId,
+        createdAt: schema.messages.createdAt,
+        userName: schema.users.name,
+      })
+      .from(schema.messages)
+      .innerJoin(schema.users, eq(schema.messages.userId, schema.users.id))
+      .where(
+        and(
+          eq(schema.messages.roomId, roomId),
+          isNull(schema.messages.parentMessageId),
+          or(isNull(schema.messages.expiresAt), gt(schema.messages.expiresAt, new Date()))
+        )
+      )
+      .orderBy(schema.messages.createdAt)
+      .limit(200);
+
+    // Get reply counts for top-level messages
+    let replyCountByMessage: Record<number, number> = {};
+    if (msgs.length > 0) {
+      const replyCounts = await pool.query<{ parent_message_id: number; count: string }>(
+        `SELECT parent_message_id, COUNT(*)::int as count FROM messages WHERE room_id = $1 AND parent_message_id IS NOT NULL GROUP BY parent_message_id`,
+        [roomId]
+      );
+      for (const row of replyCounts.rows) {
+        replyCountByMessage[row.parent_message_id] = parseInt(row.count);
+      }
+    }
+
+    // Get read receipts for these messages
+    let receiptsByMessage: Record<number, { userId: number; userName: string }[]> = {};
+    if (msgs.length > 0) {
+      const receipts = await db
+        .select({
+          messageId: schema.readReceipts.messageId,
+          userId: schema.readReceipts.userId,
+          userName: schema.users.name,
+        })
+        .from(schema.readReceipts)
+        .innerJoin(schema.users, eq(schema.readReceipts.userId, schema.users.id))
+        .where(inArray(schema.readReceipts.messageId, msgs.map((m) => m.id)));
+
+      for (const r of receipts) {
+        if (!receiptsByMessage[r.messageId]) receiptsByMessage[r.messageId] = [];
+        receiptsByMessage[r.messageId].push({ userId: r.userId, userName: r.userName });
+      }
+    }
+
+    // Get reactions for these messages
+    let reactionsByMessage: Record<number, { emoji: string; userId: number; userName: string }[]> = {};
+    if (msgs.length > 0) {
+      const reactions = await db
+        .select({
+          messageId: schema.messageReactions.messageId,
+          userId: schema.messageReactions.userId,
+          emoji: schema.messageReactions.emoji,
+          userName: schema.users.name,
+        })
+        .from(schema.messageReactions)
+        .innerJoin(schema.users, eq(schema.messageReactions.userId, schema.users.id))
+        .where(inArray(schema.messageReactions.messageId, msgs.map((m) => m.id)));
+
+      for (const r of reactions) {
+        if (!reactionsByMessage[r.messageId]) reactionsByMessage[r.messageId] = [];
+        reactionsByMessage[r.messageId].push({ emoji: r.emoji, userId: r.userId, userName: r.userName });
+      }
+    }
+
+    const result = msgs.map((m) => ({
+      ...m,
+      readBy: (receiptsByMessage[m.id] ?? []).filter((r) => r.userId !== m.userId),
+      reactions: reactionsByMessage[m.id] ?? [],
+      replyCount: replyCountByMessage[m.id] ?? 0,
+    }));
+
+    // Mark all messages as read for this user and broadcast
+    if (userId && msgs.length > 0) {
+      const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+      const newlyRead: number[] = [];
+
+      for (const msg of msgs) {
+        const inserted = await db
+          .insert(schema.readReceipts)
+          .values({ userId, messageId: msg.id })
+          .onConflictDoNothing()
+          .returning();
+        if (inserted.length > 0) newlyRead.push(msg.id);
+      }
+
+      if (newlyRead.length > 0 && user) {
+        io.to(`room:${roomId}`).emit('bulk_read', {
+          messageIds: newlyRead,
+          userId,
+          userName: user.name,
+        });
+      }
+    }
+
+    res.json(result);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get messages' });
+  }
+});
+
+// ─── Message Threading ────────────────────────────────────────────────────────
+
+// Get thread (parent message + all replies)
+app.get('/api/messages/:id/thread', async (req, res) => {
+  const parentMessageId = parseInt(req.params.id);
+  const userId = parseInt(req.query.userId as string);
+
+  try {
+    // Get parent message
+    const [parentRaw] = await db
+      .select({
+        id: schema.messages.id,
+        roomId: schema.messages.roomId,
+        userId: schema.messages.userId,
+        content: schema.messages.content,
+        expiresAt: schema.messages.expiresAt,
+        editedAt: schema.messages.editedAt,
+        parentMessageId: schema.messages.parentMessageId,
+        createdAt: schema.messages.createdAt,
+        userName: schema.users.name,
+      })
+      .from(schema.messages)
+      .innerJoin(schema.users, eq(schema.messages.userId, schema.users.id))
+      .where(eq(schema.messages.id, parentMessageId));
+
+    if (!parentRaw) return res.status(404).json({ error: 'Message not found' });
+
+    // Get replies
+    const replies = await db
+      .select({
+        id: schema.messages.id,
+        roomId: schema.messages.roomId,
+        userId: schema.messages.userId,
+        content: schema.messages.content,
+        expiresAt: schema.messages.expiresAt,
+        editedAt: schema.messages.editedAt,
+        parentMessageId: schema.messages.parentMessageId,
+        createdAt: schema.messages.createdAt,
+        userName: schema.users.name,
+      })
+      .from(schema.messages)
+      .innerJoin(schema.users, eq(schema.messages.userId, schema.users.id))
+      .where(eq(schema.messages.parentMessageId, parentMessageId))
+      .orderBy(schema.messages.createdAt)
+      .limit(200);
+
+    // Get reactions for replies
+    let reactionsByMessage: Record<number, { emoji: string; userId: number; userName: string }[]> = {};
+    if (replies.length > 0) {
+      const reactions = await db
+        .select({
+          messageId: schema.messageReactions.messageId,
+          userId: schema.messageReactions.userId,
+          emoji: schema.messageReactions.emoji,
+          userName: schema.users.name,
+        })
+        .from(schema.messageReactions)
+        .innerJoin(schema.users, eq(schema.messageReactions.userId, schema.users.id))
+        .where(inArray(schema.messageReactions.messageId, replies.map((r) => r.id)));
+      for (const r of reactions) {
+        if (!reactionsByMessage[r.messageId]) reactionsByMessage[r.messageId] = [];
+        reactionsByMessage[r.messageId].push({ emoji: r.emoji, userId: r.userId, userName: r.userName });
+      }
+    }
+
+    const replyCount = replies.length;
+    const parent = { ...parentRaw, readBy: [], reactions: [], replyCount };
+    const replyMessages = replies.map((r) => ({
+      ...r,
+      readBy: [],
+      reactions: reactionsByMessage[r.id] ?? [],
+      replyCount: 0,
+    }));
+
+    // Mark replies as read for the requesting user
+    if (userId && replies.length > 0) {
+      for (const reply of replies) {
+        await db
+          .insert(schema.readReceipts)
+          .values({ userId, messageId: reply.id })
+          .onConflictDoNothing();
+      }
+    }
+
+    res.json({ parent, replies: replyMessages });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get thread' });
+  }
+});
+
+// ─── Message Editing ──────────────────────────────────────────────────────────
+
+// Edit a message (owner only)
+app.patch('/api/messages/:id', async (req, res) => {
+  const messageId = parseInt(req.params.id);
+  const { userId, content } = req.body as { userId?: number; content?: string };
+
+  if (!userId || !content?.trim()) {
+    return res.status(400).json({ error: 'userId and content required' });
+  }
+  if (content.trim().length > 2000) {
+    return res.status(400).json({ error: 'Content too long' });
+  }
+
+  try {
+    const [message] = await db.select().from(schema.messages).where(eq(schema.messages.id, messageId));
+    if (!message) return res.status(404).json({ error: 'Message not found' });
+    if (message.userId !== userId) return res.status(403).json({ error: 'Cannot edit another user\'s message' });
+
+    // Save current content to edit history
+    await db.insert(schema.messageEdits).values({ messageId, content: message.content });
+
+    // Update message
+    const [updated] = await db
+      .update(schema.messages)
+      .set({ content: content.trim(), editedAt: new Date() })
+      .where(eq(schema.messages.id, messageId))
+      .returning();
+
+    const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+
+    const payload = {
+      messageId,
+      content: updated.content,
+      editedAt: updated.editedAt,
+      userName: user?.name ?? '',
+    };
+    io.to(`room:${message.roomId}`).emit('message_edited', payload);
+
+    res.json({ ok: true, ...payload });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to edit message' });
+  }
+});
+
+// Get edit history for a message
+app.get('/api/messages/:id/history', async (req, res) => {
+  const messageId = parseInt(req.params.id);
+
+  try {
+    const history = await db
+      .select()
+      .from(schema.messageEdits)
+      .where(eq(schema.messageEdits.messageId, messageId))
+      .orderBy(schema.messageEdits.editedAt);
+    res.json(history);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get edit history' });
+  }
+});
+
+// ─── Reactions ────────────────────────────────────────────────────────────────
+
+// Toggle a reaction (add if not present, remove if already present)
+app.post('/api/messages/:id/reactions', async (req, res) => {
+  const messageId = parseInt(req.params.id);
+  const { userId, emoji } = req.body as { userId?: number; emoji?: string };
+
+  const ALLOWED_EMOJIS = ['👍', '❤️', '😂', '😮', '😢'];
+  if (!userId || !emoji || !ALLOWED_EMOJIS.includes(emoji)) {
+    return res.status(400).json({ error: 'userId and valid emoji required' });
+  }
+
+  try {
+    const [message] = await db.select().from(schema.messages).where(eq(schema.messages.id, messageId));
+    if (!message) return res.status(404).json({ error: 'Message not found' });
+
+    const [user] = await db.select().from(schema.users).where(eq(schema.users.id, userId));
+    if (!user) return res.status(404).json({ error: 'User not found' });
+
+    // Check if reaction already exists
+    const [existing] = await db
+      .select()
+      .from(schema.messageReactions)
+      .where(
+        and(
+          eq(schema.messageReactions.userId, userId),
+          eq(schema.messageReactions.messageId, messageId),
+          eq(schema.messageReactions.emoji, emoji)
+        )
+      );
+
+    let added: boolean;
+    if (existing) {
+      // Remove reaction (toggle off)
+      await db
+        .delete(schema.messageReactions)
+        .where(
+          and(
+            eq(schema.messageReactions.userId, userId),
+            eq(schema.messageReactions.messageId, messageId),
+            eq(schema.messageReactions.emoji, emoji)
+          )
+        );
+      added = false;
+    } else {
+      // Add reaction
+      await db
+        .insert(schema.messageReactions)
+        .values({ userId, messageId, emoji })
+        .onConflictDoNothing();
+      added = true;
+    }
+
+    // Broadcast to the room
+    const payload = { messageId, userId, userName: user.name, emoji, added };
+    io.to(`room:${message.roomId}`).emit('reaction_updated', payload);
+
+    res.json({ ok: true, added });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to toggle reaction' });
+  }
+});
+
+// ─── Scheduled Messages ────────────────────────────────────────────────────────
+
+// Create a scheduled message
+app.post('/api/scheduled-messages', async (req, res) => {
+  const { roomId, userId, content, scheduledFor } = req.body as {
+    roomId?: number;
+    userId?: number;
+    content?: string;
+    scheduledFor?: string;
+  };
+
+  if (!roomId || !userId || !content?.trim() || !scheduledFor) {
+    return res.status(400).json({ error: 'roomId, userId, content, and scheduledFor are required' });
+  }
+
+  const scheduleDate = new Date(scheduledFor);
+  if (isNaN(scheduleDate.getTime()) || scheduleDate <= new Date()) {
+    return res.status(400).json({ error: 'scheduledFor must be a future date' });
+  }
+
+  if (content.trim().length > 2000) {
+    return res.status(400).json({ error: 'Content too long' });
+  }
+
+  try {
+    const [scheduled] = await db
+      .insert(schema.scheduledMessages)
+      .values({ roomId, userId, content: content.trim(), scheduledFor: scheduleDate })
+      .returning();
+    res.json(scheduled);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to schedule message' });
+  }
+});
+
+// Get pending scheduled messages for a user
+app.get('/api/scheduled-messages', async (req, res) => {
+  const userId = parseInt(req.query.userId as string);
+  if (!userId) return res.status(400).json({ error: 'userId required' });
+
+  try {
+    const scheduled = await db
+      .select({
+        id: schema.scheduledMessages.id,
+        roomId: schema.scheduledMessages.roomId,
+        userId: schema.scheduledMessages.userId,
+        content: schema.scheduledMessages.content,
+        scheduledFor: schema.scheduledMessages.scheduledFor,
+        createdAt: schema.scheduledMessages.createdAt,
+        roomName: schema.rooms.name,
+      })
+      .from(schema.scheduledMessages)
+      .innerJoin(schema.rooms, eq(schema.scheduledMessages.roomId, schema.rooms.id))
+      .where(
+        and(
+          eq(schema.scheduledMessages.userId, userId),
+          eq(schema.scheduledMessages.sent, false),
+          eq(schema.scheduledMessages.cancelled, false)
+        )
+      )
+      .orderBy(schema.scheduledMessages.scheduledFor);
+    res.json(scheduled);
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to get scheduled messages' });
+  }
+});
+
+// Cancel a scheduled message
+app.delete('/api/scheduled-messages/:id', async (req, res) => {
+  const id = parseInt(req.params.id);
+  const { userId } = req.body as { userId: number };
+
+  try {
+    const [updated] = await db
+      .update(schema.scheduledMessages)
+      .set({ cancelled: true })
+      .where(
+        and(
+          eq(schema.scheduledMessages.id, id),
+          eq(schema.scheduledMessages.userId, userId),
+          eq(schema.scheduledMessages.sent, false),
+          eq(schema.scheduledMessages.cancelled, false)
+        )
+      )
+      .returning();
+
+    if (!updated) return res.status(404).json({ error: 'Scheduled message not found or already sent/cancelled' });
+    res.json({ ok: true });
+  } catch (e) {
+    console.error(e);
+    res.status(500).json({ error: 'Failed to cancel scheduled message' });
+  }
+});
+
+// Background job: send due scheduled messages every 10 seconds
+setInterval(async () => {
+  try {
+    const due = await db
+      .select()
+      .from(schema.scheduledMessages)
+      .where(
+        and(
+          eq(schema.scheduledMessages.sent, false),
+          eq(schema.scheduledMessages.cancelled, false),
+          lte(schema.scheduledMessages.scheduledFor, new Date())
+        )
+      );
+
+    for (const scheduled of due) {
+      // Mark as sent first to avoid double-sending
+      const [updated] = await db
+        .update(schema.scheduledMessages)
+        .set({ sent: true })
+        .where(
+          and(
+            eq(schema.scheduledMessages.id, scheduled.id),
+            eq(schema.scheduledMessages.sent, false)
+          )
+        )
+        .returning();
+
+      if (!updated) continue;
+
+      const [user] = await db.select().from(schema.users).where(eq(schema.users.id, scheduled.userId));
+      if (!user) continue;
+
+      const [message] = await db
+        .insert(schema.messages)
+        .values({ roomId: scheduled.roomId, userId: scheduled.userId, content: scheduled.content })
+        .returning();
+
+      const fullMessage = {
+        ...message,
+        userName: user.name,
+        readBy: [] as { userId: number; userName: string }[],
+        reactions: [] as { emoji: string; userId: number; userName: string }[],
+        editedAt: null as Date | null,
+      };
+
+      io.to(`room:${scheduled.roomId}`).emit('message', fullMessage);
+
+      // Notify members not in the room
+      const activeSocketIds = io.sockets.adapter.rooms.get(`room:${scheduled.roomId}`) ?? new Set<string>();
+      const members = await db.select().from(schema.roomMembers).where(eq(schema.roomMembers.roomId, scheduled.roomId));
+      for (const member of members) {
+        if (member.userId === scheduled.userId) continue;
+        const memberSocketId = userSockets.get(member.userId);
+        if (memberSocketId && !activeSocketIds.has(memberSocketId)) {
+          io.to(memberSocketId).emit('message', fullMessage);
+        }
+      }
+
+      // Notify the author that their scheduled message was sent
+      const authorSocketId = userSockets.get(scheduled.userId);
+      if (authorSocketId) {
+        io.to(authorSocketId).emit('scheduled_message_sent', { id: scheduled.id });
+      }
+    }
+  } catch (e) {
+    console.error('Scheduled message job error:', e);
+  }
+}, 10000);
+
+// Background job: delete expired ephemeral messages every 5 seconds
+setInterval(async () => {
+  try {
+    const expired = await db
+      .select({ id: schema.messages.id, roomId: schema.messages.roomId })
+      .from(schema.messages)
+      .where(and(isNotNull(schema.messages.expiresAt), lte(schema.messages.expiresAt!, new Date())));
+
+    if (expired.length === 0) return;
+
+    // Notify rooms before deleting
+    for (const msg of expired) {
+      io.to(`room:${msg.roomId}`).emit('message_expired', { messageId: msg.id, roomId: msg.roomId });
+    }
+
+    await db
+      .delete(schema.messages)
+      .where(and(isNotNull(schema.messages.expiresAt), lte(schema.messages.expiresAt!, new Date())));
+  } catch (e) {
+    console.error('Ephemeral cleanup job error:', e);
+  }
+}, 5000);
+
+// ─── Draft endpoints ──────────────────────────────────────────────────────────
+
+app.get('/api/drafts', async (req, res) => {
+  const userId = parseInt(req.query.userId as string);
+  if (!userId) return res.status(400).json({ error: 'userId required' });
+  const rows = await db.select().from(schema.drafts).where(eq(schema.drafts.userId, userId));
+  res.json(rows);
+});
+
+app.put('/api/drafts', async (req, res) => {
+  const { userId, roomId, content } = req.body as { userId: number; roomId: number; content: string };
+  if (!userId || !roomId) return res.status(400).json({ error: 'userId and roomId required' });
+  const now = new Date();
+  const [draft] = await db
+    .insert(schema.drafts)
+    .values({ userId, roomId, content: content ?? '', updatedAt: now })
+    .onConflictDoUpdate({
+      target: [schema.drafts.userId, schema.drafts.roomId],
+      set: { content: content ?? '', updatedAt: now },
+    })
+    .returning();
+  res.json(draft);
+});
+
+// ─── Socket.io ────────────────────────────────────────────────────────────────
+
+io.on('connection', (socket) => {
+  console.log('Client connected:', socket.id);
+
+  socket.on('register', async ({ userId, userName }: { userId: number; userName: string }) => {
+    connectedUsers.set(socket.id, { id: userId, name: userName });
+    userSockets.set(userId, socket.id);
+
+    await db
+      .update(schema.users)
+      .set({ online: true, status: 'online', lastSeen: new Date() })
+      .where(eq(schema.users.id, userId));
+
+    io.emit('user_status', { userId, online: true, name: userName, status: 'online' });
+  });
+
+  socket.on('set_status', async ({ status }: { status: string }) => {
+    const user = connectedUsers.get(socket.id);
+    if (!user) return;
+    const VALID_STATUSES = ['online', 'away', 'dnd', 'invisible'];
+    if (!VALID_STATUSES.includes(status)) return;
+
+    const statusNow = new Date();
+    const [updated] = await db
+      .update(schema.users)
+      .set({ status, lastSeen: statusNow })
+      .where(eq(schema.users.id, user.id))
+      .returning();
+
+    if (!updated) return;
+
+    // Invisible users appear offline to others
+    const broadcastStatus = status === 'invisible' ? 'offline' : status;
+    io.emit('user_status', {
+      userId: user.id,
+      name: user.name,
+      online: broadcastStatus !== 'offline',
+      status: broadcastStatus,
+      lastSeen: statusNow,
+    });
+  });
+
+  socket.on('join_room', ({ roomId }: { roomId: number }) => {
+    socket.join(`room:${roomId}`);
+  });
+
+  socket.on('leave_room', ({ roomId }: { roomId: number }) => {
+    socket.leave(`room:${roomId}`);
+    const user = connectedUsers.get(socket.id);
+    if (user) stopTyping(user.id, user.name, roomId);
+  });
+
+  socket.on('join_thread', ({ parentMessageId }: { parentMessageId: number }) => {
+    socket.join(`thread:${parentMessageId}`);
+  });
+
+  socket.on('leave_thread', ({ parentMessageId }: { parentMessageId: number }) => {
+    socket.leave(`thread:${parentMessageId}`);
+  });
+
+  socket.on(
+    'send_message',
+    async ({ roomId, content, expiresInMs, parentMessageId }: { roomId: number; content: string; expiresInMs?: number; parentMessageId?: number }) => {
+      // OPTIMIZED — same features, fewer sequential awaits:
+      // banned+membership check kept, lastSeen+notification made non-blocking
+      const user = connectedUsers.get(socket.id);
+      if (!user) return;
+
+      // Rate limit: 500ms between messages
+      const now = Date.now();
+      const last = lastMessageTime.get(user.id) ?? 0;
+      if (now - last < 500) return;
+      lastMessageTime.set(user.id, now);
+
+      if (!content?.trim() || content.trim().length > 2000) return;
+
+      // Membership check (blocking)
+      const [membership] = await db
+        .select()
+        .from(schema.roomMembers)
+        .where(and(eq(schema.roomMembers.userId, user.id), eq(schema.roomMembers.roomId, roomId)));
+      if (!membership) return;
+
+      // Banned check (blocking)
+      const [banned] = await db
+        .select()
+        .from(schema.bannedUsers)
+        .where(and(eq(schema.bannedUsers.userId, user.id), eq(schema.bannedUsers.roomId, roomId)));
+      if (banned) return;
+
+      const expiresAt = expiresInMs && expiresInMs > 0 ? new Date(Date.now() + expiresInMs) : null;
+
+      // Insert message (blocking — need the result)
+      const [message] = await db
+        .insert(schema.messages)
+        .values({
+          roomId,
+          userId: user.id,
+          content: content.trim(),
+          ...(expiresAt ? { expiresAt } : {}),
+          ...(parentMessageId ? { parentMessageId } : {}),
+        })
+        .returning();
+
+      const fullMessage = {
+        ...message,
+        userName: user.name,
+        readBy: [] as { userId: number; userName: string }[],
+        reactions: [] as { emoji: string; userId: number; userName: string }[],
+        editedAt: null as Date | null,
+        replyCount: 0,
+      };
+
+      // Emit to room immediately
+      if (parentMessageId) {
+        io.to(`thread:${parentMessageId}`).emit('thread_reply', fullMessage);
+      } else {
+        io.to(`room:${roomId}`).emit('message', fullMessage);
+      }
+
+      // Non-blocking: lastSeen update + notification fanout (don't await)
+      db.update(schema.users).set({ lastSeen: new Date() }).where(eq(schema.users.id, user.id)).catch(() => {});
+
+      if (!parentMessageId) {
+        db.select().from(schema.roomMembers).where(eq(schema.roomMembers.roomId, roomId)).then(members => {
+          const activeSocketIds = io.sockets.adapter.rooms.get(`room:${roomId}`) ?? new Set<string>();
+          for (const member of members) {
+            if (member.userId === user.id) continue;
+            const memberSocketId = userSockets.get(member.userId);
+            if (memberSocketId && !activeSocketIds.has(memberSocketId)) {
+              io.to(memberSocketId).emit('message', fullMessage);
+            }
+          }
+        }).catch(() => {});
+        stopTyping(user.id, user.name, roomId);
+        recordRoomMessage(roomId);
+      }
+    }
+  );
+
+  socket.on('typing_start', ({ roomId }: { roomId: number }) => {
+    const user = connectedUsers.get(socket.id);
+    if (!user) return;
+
+    if (!typingState.has(roomId)) typingState.set(roomId, new Map());
+    const roomTyping = typingState.get(roomId)!;
+
+    // Reset timer
+    if (roomTyping.has(user.id)) clearTimeout(roomTyping.get(user.id)!.timer);
+
+    socket.to(`room:${roomId}`).emit('typing', { userId: user.id, userName: user.name, typing: true });
+
+    const timer = setTimeout(() => {
+      stopTyping(user.id, user.name, roomId);
+    }, 3000);
+
+    roomTyping.set(user.id, { timer, userName: user.name });
+  });
+
+  socket.on('typing_stop', ({ roomId }: { roomId: number }) => {
+    const user = connectedUsers.get(socket.id);
+    if (!user) return;
+    stopTyping(user.id, user.name, roomId);
+  });
+
+  socket.on('save_draft', async ({ roomId, content }: { roomId: number; content: string }) => {
+    const user = connectedUsers.get(socket.id);
+    if (!user) return;
+    const now = new Date();
+    const [draft] = await db
+      .insert(schema.drafts)
+      .values({ userId: user.id, roomId, content: content ?? '', updatedAt: now })
+      .onConflictDoUpdate({
+        target: [schema.drafts.userId, schema.drafts.roomId],
+        set: { content: content ?? '', updatedAt: now },
+      })
+      .returning();
+    // Broadcast to other sockets of the same user (multi-device sync)
+    const otherSocketId = userSockets.get(user.id);
+    if (otherSocketId && otherSocketId !== socket.id) {
+      io.to(otherSocketId).emit('draft_updated', { roomId, content: draft.content });
+    }
+  });
+
+  socket.on('mark_read', async ({ messageId }: { messageId: number }) => {
+    const user = connectedUsers.get(socket.id);
+    if (!user) return;
+
+    const inserted = await db
+      .insert(schema.readReceipts)
+      .values({ userId: user.id, messageId })
+      .onConflictDoNothing()
+      .returning();
+
+    if (inserted.length > 0) {
+      const [message] = await db
+        .select()
+        .from(schema.messages)
+        .where(eq(schema.messages.id, messageId));
+
+      if (message) {
+        io.to(`room:${message.roomId}`).emit('read_receipt', {
+          messageId,
+          userId: user.id,
+          userName: user.name,
+        });
+      }
+    }
+  });
+
+  socket.on('disconnect', async () => {
+    const user = connectedUsers.get(socket.id);
+    if (user) {
+      connectedUsers.delete(socket.id);
+      userSockets.delete(user.id);
+
+      const now = new Date();
+      await db
+        .update(schema.users)
+        .set({ online: false, status: 'offline', lastSeen: now })
+        .where(eq(schema.users.id, user.id));
+
+      io.emit('user_status', { userId: user.id, online: false, name: user.name, status: 'offline', lastSeen: now });
+
+      // Clear all typing for this user
+      typingState.forEach((roomTyping, roomId) => {
+        if (roomTyping.has(user.id)) {
+          clearTimeout(roomTyping.get(user.id)!.timer);
+          roomTyping.delete(user.id);
+          io.to(`room:${roomId}`).emit('typing', { userId: user.id, userName: user.name, typing: false });
+        }
+      });
+    }
+    console.log('Client disconnected:', socket.id);
+  });
+});
+
+function stopTyping(userId: number, userName: string, roomId: number) {
+  const roomTyping = typingState.get(roomId);
+  if (roomTyping?.has(userId)) {
+    clearTimeout(roomTyping.get(userId)!.timer);
+    roomTyping.delete(userId);
+  }
+  io.to(`room:${roomId}`).emit('typing', { userId, userName, typing: false });
+}
+
+const PORT = parseInt(process.env.PORT ?? '6001');
+httpServer.listen(PORT, () => {
+  console.log(`Server running on http://localhost:${PORT}`);
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/stdb-index-optimized.ts b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/stdb-index-optimized.ts
new file mode 100644
index 00000000000..9510387b2ba
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/stdb-index-optimized.ts
@@ -0,0 +1,657 @@
+﻿import spacetimedb from './schema';
+import { t, SenderError } from 'spacetimedb/server';
+import { ScheduleAt, Timestamp } from 'spacetimedb';
+export { default } from './schema';
+export { sendScheduledMessage, deleteExpiredMessage } from './schema';
+
+// Lifecycle hooks
+export const onConnect = spacetimedb.clientConnected((ctx) => {
+  const existing = ctx.db.user.identity.find(ctx.sender);
+  if (existing) {
+    // Restore to online unless user explicitly set invisible
+    const newStatus = existing.status === 'invisible' ? 'invisible' : 'online';
+    ctx.db.user.identity.update({ ...existing, status: newStatus, lastActiveAt: null });
+  } else {
+    // Auto-create an anonymous user with a temporary name derived from their identity
+    const hex = ctx.sender.toHexString();
+    const shortId = hex.slice(0, 6);
+    ctx.db.user.insert({
+      identity: ctx.sender,
+      name: `Anon_${shortId}`,
+      status: 'online',
+      lastActiveAt: null,
+      createdAt: ctx.timestamp,
+      isAnonymous: true,
+    });
+  }
+});
+
+export const onDisconnect = spacetimedb.clientDisconnected((ctx) => {
+  const existing = ctx.db.user.identity.find(ctx.sender);
+  if (existing) {
+    ctx.db.user.identity.update({ ...existing, status: 'offline', lastActiveAt: ctx.timestamp });
+    // Clear typing indicators for this user
+    for (const ti of [...ctx.db.typingIndicator.userIdentity.filter(ctx.sender)]) {
+      ctx.db.typingIndicator.id.delete(ti.id);
+    }
+  }
+});
+
+// Set or update display name (also marks user as registered, no longer anonymous)
+export const setName = spacetimedb.reducer(
+  { name: t.string() },
+  (ctx, { name }) => {
+    const trimmed = name.trim();
+    if (trimmed.length === 0) throw new SenderError('Name cannot be empty');
+    if (trimmed.length > 32) throw new SenderError('Name too long (max 32 chars)');
+
+    const existing = ctx.db.user.identity.find(ctx.sender);
+    if (existing) {
+      ctx.db.user.identity.update({ ...existing, name: trimmed, isAnonymous: false });
+    } else {
+      ctx.db.user.insert({ identity: ctx.sender, name: trimmed, status: 'online', lastActiveAt: null, createdAt: ctx.timestamp, isAnonymous: false });
+    }
+  }
+);
+
+// Set user presence status
+export const setStatus = spacetimedb.reducer(
+  { status: t.string() },
+  (ctx, { status }) => {
+    const valid = ['online', 'away', 'dnd', 'invisible'];
+    if (!valid.includes(status)) throw new SenderError('Invalid status');
+
+    const existing = ctx.db.user.identity.find(ctx.sender);
+    if (!existing) throw new SenderError('User not found');
+    ctx.db.user.identity.update({ ...existing, status, lastActiveAt: ctx.timestamp });
+  }
+);
+
+// Create a room
+export const createRoom = spacetimedb.reducer(
+  { name: t.string(), isPrivate: t.bool() },
+  (ctx, { name, isPrivate }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    const trimmed = name.trim();
+    if (trimmed.length === 0) throw new SenderError('Room name cannot be empty');
+    if (trimmed.length > 64) throw new SenderError('Room name too long (max 64 chars)');
+
+    // Check for duplicate name
+    const existing = ctx.db.room.name.find(trimmed);
+    if (existing) throw new SenderError('Room already exists');
+
+    const roomId = ctx.db.room.insert({ id: 0n, name: trimmed, createdBy: ctx.sender, createdAt: ctx.timestamp, isPrivate, isDm: false }).id;
+    // Auto-join and auto-admin the creator
+    ctx.db.roomMember.insert({ id: 0n, roomId, userIdentity: ctx.sender, joinedAt: ctx.timestamp });
+    ctx.db.roomAdmin.insert({ id: 0n, roomId, userIdentity: ctx.sender });
+  }
+);
+
+// Join a room (public only — private rooms require invitation via acceptInvitation)
+export const joinRoom = spacetimedb.reducer(
+  { roomId: t.u64() },
+  (ctx, { roomId }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    const room = ctx.db.room.id.find(roomId);
+    if (!room) throw new SenderError('Room not found');
+    if (room.isPrivate) throw new SenderError('This is a private room. You must be invited.');
+
+    // Check if banned
+    for (const b of [...ctx.db.bannedUser.roomId.filter(roomId)]) {
+      if (b.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        throw new SenderError('You are banned from this room');
+      }
+    }
+
+    // Check if already a member
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        throw new SenderError('Already a member');
+      }
+    }
+    ctx.db.roomMember.insert({ id: 0n, roomId, userIdentity: ctx.sender, joinedAt: ctx.timestamp });
+  }
+);
+
+// Leave a room
+export const leaveRoom = spacetimedb.reducer(
+  { roomId: t.u64() },
+  (ctx, { roomId }) => {
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        ctx.db.roomMember.id.delete(m.id);
+        // Clear typing indicator if any
+        for (const ti of [...ctx.db.typingIndicator.roomId.filter(roomId)]) {
+          if (ti.userIdentity.toHexString() === ctx.sender.toHexString()) {
+            ctx.db.typingIndicator.id.delete(ti.id);
+          }
+        }
+        return;
+      }
+    }
+    throw new SenderError('Not a member of this room');
+  }
+);
+
+// Send a message (OPTIMIZED — same features, better implementation:
+// use identity index for lookups, no spread-to-array, no toHexString allocs)
+export const sendMessage = spacetimedb.reducer(
+  { roomId: t.u64(), text: t.string() },
+  (ctx, { roomId, text }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    // Membership: filter by sender's identity index, compare roomId as bigint
+    let isMember = false;
+    for (const m of ctx.db.roomMember.userIdentity.filter(ctx.sender)) {
+      if (m.roomId === roomId) { isMember = true; break; }
+    }
+    if (!isMember) throw new SenderError('Not a member of this room');
+
+    const trimmed = text.trim();
+    if (trimmed.length === 0) throw new SenderError('Message cannot be empty');
+    if (trimmed.length > 2000) throw new SenderError('Message too long (max 2000 chars)');
+
+    const msg = ctx.db.message.insert({ id: 0n, roomId, senderIdentity: ctx.sender, text: trimmed, sentAt: ctx.timestamp, expiresAt: null, editedAt: null, parentMessageId: null });
+
+    // Update sender's read receipt — use identity index instead of room scan
+    let found: any = undefined;
+    for (const r of ctx.db.readReceipt.userIdentity.filter(ctx.sender)) {
+      if (r.roomId === roomId) { found = r; break; }
+    }
+    if (found) {
+      ctx.db.readReceipt.id.update({ ...found, lastReadMessageId: msg.id, updatedAt: ctx.timestamp });
+    } else {
+      ctx.db.readReceipt.insert({ id: 0n, roomId, userIdentity: ctx.sender, lastReadMessageId: msg.id, updatedAt: ctx.timestamp });
+    }
+
+    // Clear typing indicator — use identity index instead of room scan
+    for (const ti of ctx.db.typingIndicator.userIdentity.filter(ctx.sender)) {
+      if (ti.roomId === roomId) {
+        ctx.db.typingIndicator.id.delete(ti.id);
+        break;
+      }
+    }
+  }
+);
+
+// Send an ephemeral message that auto-deletes after durationSecs seconds
+export const sendEphemeralMessage = spacetimedb.reducer(
+  { roomId: t.u64(), text: t.string(), durationSecs: t.u32() },
+  (ctx, { roomId, text, durationSecs }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    let isMember = false;
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        isMember = true;
+        break;
+      }
+    }
+    if (!isMember) throw new SenderError('Not a member of this room');
+
+    const trimmed = text.trim();
+    if (trimmed.length === 0) throw new SenderError('Message cannot be empty');
+    if (trimmed.length > 2000) throw new SenderError('Message too long (max 2000 chars)');
+    if (durationSecs < 1 || durationSecs > 86400) throw new SenderError('Invalid duration');
+
+    const expiryMicros = ctx.timestamp.microsSinceUnixEpoch + BigInt(durationSecs) * 1_000_000n;
+
+    const msg = ctx.db.message.insert({
+      id: 0n,
+      roomId,
+      senderIdentity: ctx.sender,
+      text: trimmed,
+      sentAt: ctx.timestamp,
+      expiresAt: new Timestamp(expiryMicros),
+      editedAt: null,
+      parentMessageId: null,
+    });
+
+    // Update sender's read receipt
+    let found: { id: bigint; roomId: bigint; userIdentity: { toHexString(): string }; lastReadMessageId: bigint; updatedAt: { microsSinceUnixEpoch: bigint } } | undefined;
+    for (const r of [...ctx.db.readReceipt.roomId.filter(roomId)]) {
+      if (r.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        found = r;
+        break;
+      }
+    }
+    if (found) {
+      ctx.db.readReceipt.id.update({ ...found, lastReadMessageId: msg.id, updatedAt: ctx.timestamp });
+    } else {
+      ctx.db.readReceipt.insert({ id: 0n, roomId, userIdentity: ctx.sender, lastReadMessageId: msg.id, updatedAt: ctx.timestamp });
+    }
+
+    // Clear typing indicator
+    for (const ti of [...ctx.db.typingIndicator.roomId.filter(roomId)]) {
+      if (ti.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        ctx.db.typingIndicator.id.delete(ti.id);
+      }
+    }
+
+    // Schedule deletion
+    ctx.db.messageExpiry.insert({
+      scheduledId: 0n,
+      scheduledAt: ScheduleAt.time(expiryMicros),
+      messageId: msg.id,
+    });
+  }
+);
+
+// Edit a message and save previous version to history
+export const editMessage = spacetimedb.reducer(
+  { messageId: t.u64(), newText: t.string() },
+  (ctx, { messageId, newText }) => {
+    const msg = ctx.db.message.id.find(messageId);
+    if (!msg) throw new SenderError('Message not found');
+    if (msg.senderIdentity.toHexString() !== ctx.sender.toHexString()) {
+      throw new SenderError('Can only edit your own messages');
+    }
+
+    const trimmed = newText.trim();
+    if (trimmed.length === 0) throw new SenderError('Message cannot be empty');
+    if (trimmed.length > 2000) throw new SenderError('Message too long (max 2000 chars)');
+    if (trimmed === msg.text) return; // No change
+
+    // Save previous version to history
+    ctx.db.messageEdit.insert({ id: 0n, messageId, previousText: msg.text, editedAt: ctx.timestamp });
+
+    // Update the message
+    ctx.db.message.id.update({ ...msg, text: trimmed, editedAt: ctx.timestamp });
+  }
+);
+
+// Set typing indicator
+export const setTyping = spacetimedb.reducer(
+  { roomId: t.u64(), isTyping: t.bool() },
+  (ctx, { roomId, isTyping }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) return;
+    if (!ctx.db.room.id.find(roomId)) return;
+
+    // Find existing
+    let found: { id: bigint; roomId: bigint; userIdentity: { toHexString(): string }; updatedAt: { microsSinceUnixEpoch: bigint } } | undefined;
+    for (const ti of [...ctx.db.typingIndicator.roomId.filter(roomId)]) {
+      if (ti.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        found = ti;
+        break;
+      }
+    }
+
+    if (isTyping) {
+      if (found) {
+        ctx.db.typingIndicator.id.update({ ...found, updatedAt: ctx.timestamp });
+      } else {
+        ctx.db.typingIndicator.insert({ id: 0n, roomId, userIdentity: ctx.sender, updatedAt: ctx.timestamp });
+      }
+    } else {
+      if (found) {
+        ctx.db.typingIndicator.id.delete(found.id);
+      }
+    }
+  }
+);
+
+// Mark messages as read up to a given message ID
+export const markRead = spacetimedb.reducer(
+  { roomId: t.u64(), messageId: t.u64() },
+  (ctx, { roomId, messageId }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) return;
+
+    let found: { id: bigint; roomId: bigint; userIdentity: { toHexString(): string }; lastReadMessageId: bigint; updatedAt: { microsSinceUnixEpoch: bigint } } | undefined;
+    for (const r of [...ctx.db.readReceipt.roomId.filter(roomId)]) {
+      if (r.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        found = r;
+        break;
+      }
+    }
+
+    if (found) {
+      if (messageId > found.lastReadMessageId) {
+        ctx.db.readReceipt.id.update({ ...found, lastReadMessageId: messageId, updatedAt: ctx.timestamp });
+      }
+    } else {
+      ctx.db.readReceipt.insert({ id: 0n, roomId, userIdentity: ctx.sender, lastReadMessageId: messageId, updatedAt: ctx.timestamp });
+    }
+  }
+);
+
+// Schedule a message to be sent at a future time
+export const scheduleMessage = spacetimedb.reducer(
+  { roomId: t.u64(), text: t.string(), scheduledAtMicros: t.u64() },
+  (ctx, { roomId, text, scheduledAtMicros }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    let isMember = false;
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        isMember = true;
+        break;
+      }
+    }
+    if (!isMember) throw new SenderError('Not a member of this room');
+
+    const trimmed = text.trim();
+    if (trimmed.length === 0) throw new SenderError('Message cannot be empty');
+    if (trimmed.length > 2000) throw new SenderError('Message too long (max 2000 chars)');
+    if (scheduledAtMicros <= ctx.timestamp.microsSinceUnixEpoch) {
+      throw new SenderError('Scheduled time must be in the future');
+    }
+
+    ctx.db.scheduledMessage.insert({
+      scheduledId: 0n,
+      scheduledAt: ScheduleAt.time(scheduledAtMicros),
+      roomId,
+      senderIdentity: ctx.sender,
+      text: trimmed,
+    });
+  }
+);
+
+// Cancel a pending scheduled message
+export const cancelScheduledMessage = spacetimedb.reducer(
+  { scheduledId: t.u64() },
+  (ctx, { scheduledId }) => {
+    const row = ctx.db.scheduledMessage.scheduledId.find(scheduledId);
+    if (!row) throw new SenderError('Scheduled message not found');
+    if (row.senderIdentity.toHexString() !== ctx.sender.toHexString()) {
+      throw new SenderError('Not your scheduled message');
+    }
+    ctx.db.scheduledMessage.scheduledId.delete(scheduledId);
+  }
+);
+
+// Kick a user from a room (removes them and bans from rejoining)
+export const kickUser = spacetimedb.reducer(
+  { roomId: t.u64(), target: t.identity() },
+  (ctx, { roomId, target }) => {
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    // Check caller is admin (check roomAdmin table or is room creator)
+    let callerIsAdmin = false;
+    for (const a of [...ctx.db.roomAdmin.roomId.filter(roomId)]) {
+      if (a.userIdentity.toHexString() === ctx.sender.toHexString()) { callerIsAdmin = true; break; }
+    }
+    if (!callerIsAdmin) {
+      const room = ctx.db.room.id.find(roomId);
+      if (room && room.createdBy.toHexString() === ctx.sender.toHexString()) callerIsAdmin = true;
+    }
+    if (!callerIsAdmin) throw new SenderError('Not authorized');
+
+    // Cannot kick an admin
+    let targetIsAdmin = false;
+    for (const a of [...ctx.db.roomAdmin.roomId.filter(roomId)]) {
+      if (a.userIdentity.toHexString() === target.toHexString()) { targetIsAdmin = true; break; }
+    }
+    if (!targetIsAdmin) {
+      const room = ctx.db.room.id.find(roomId);
+      if (room && room.createdBy.toHexString() === target.toHexString()) targetIsAdmin = true;
+    }
+    if (targetIsAdmin) throw new SenderError('Cannot kick an admin');
+
+    // Remove from room membership
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === target.toHexString()) {
+        ctx.db.roomMember.id.delete(m.id);
+        break;
+      }
+    }
+
+    // Clear typing indicators
+    for (const ti of [...ctx.db.typingIndicator.roomId.filter(roomId)]) {
+      if (ti.userIdentity.toHexString() === target.toHexString()) {
+        ctx.db.typingIndicator.id.delete(ti.id);
+      }
+    }
+
+    // Add to banned list (prevent rejoin)
+    let alreadyBanned = false;
+    for (const b of [...ctx.db.bannedUser.roomId.filter(roomId)]) {
+      if (b.userIdentity.toHexString() === target.toHexString()) { alreadyBanned = true; break; }
+    }
+    if (!alreadyBanned) {
+      ctx.db.bannedUser.insert({ id: 0n, roomId, userIdentity: target });
+    }
+  }
+);
+
+// Promote a room member to admin
+export const promoteUser = spacetimedb.reducer(
+  { roomId: t.u64(), target: t.identity() },
+  (ctx, { roomId, target }) => {
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    // Check caller is admin
+    let callerIsAdmin = false;
+    for (const a of [...ctx.db.roomAdmin.roomId.filter(roomId)]) {
+      if (a.userIdentity.toHexString() === ctx.sender.toHexString()) { callerIsAdmin = true; break; }
+    }
+    if (!callerIsAdmin) {
+      const room = ctx.db.room.id.find(roomId);
+      if (room && room.createdBy.toHexString() === ctx.sender.toHexString()) callerIsAdmin = true;
+    }
+    if (!callerIsAdmin) throw new SenderError('Not authorized');
+
+    // Target must be a member
+    let isMember = false;
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === target.toHexString()) { isMember = true; break; }
+    }
+    if (!isMember) throw new SenderError('User is not a member of this room');
+
+    // Promote if not already admin
+    let alreadyAdmin = false;
+    for (const a of [...ctx.db.roomAdmin.roomId.filter(roomId)]) {
+      if (a.userIdentity.toHexString() === target.toHexString()) { alreadyAdmin = true; break; }
+    }
+    if (!alreadyAdmin) {
+      ctx.db.roomAdmin.insert({ id: 0n, roomId, userIdentity: target });
+    }
+  }
+);
+
+// Reply to a message, creating a thread
+export const replyToMessage = spacetimedb.reducer(
+  { parentMessageId: t.u64(), text: t.string() },
+  (ctx, { parentMessageId, text }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    const parentMsg = ctx.db.message.id.find(parentMessageId);
+    if (!parentMsg) throw new SenderError('Parent message not found');
+
+    const roomId = parentMsg.roomId;
+    if (!ctx.db.room.id.find(roomId)) throw new SenderError('Room not found');
+
+    // Verify membership
+    let isMember = false;
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        isMember = true;
+        break;
+      }
+    }
+    if (!isMember) throw new SenderError('Not a member of this room');
+
+    const trimmed = text.trim();
+    if (trimmed.length === 0) throw new SenderError('Reply cannot be empty');
+    if (trimmed.length > 2000) throw new SenderError('Reply too long (max 2000 chars)');
+
+    ctx.db.message.insert({
+      id: 0n,
+      roomId,
+      senderIdentity: ctx.sender,
+      text: trimmed,
+      sentAt: ctx.timestamp,
+      expiresAt: null,
+      editedAt: null,
+      parentMessageId,
+    });
+  }
+);
+
+// Toggle a reaction on a message (add if not present, remove if already reacted with same emoji)
+export const toggleReaction = spacetimedb.reducer(
+  { messageId: t.u64(), emoji: t.string() },
+  (ctx, { messageId, emoji }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    if (!ctx.db.message.id.find(messageId)) throw new SenderError('Message not found');
+
+    const validEmojis = ['👍', '❤️', '😂', '😮', '😢'];
+    if (!validEmojis.includes(emoji)) throw new SenderError('Invalid emoji');
+
+    // Check if user already reacted with this emoji
+    let found: { id: bigint; messageId: bigint; userIdentity: { toHexString(): string }; emoji: string } | undefined;
+    for (const r of [...ctx.db.messageReaction.messageId.filter(messageId)]) {
+      if (r.userIdentity.toHexString() === ctx.sender.toHexString() && r.emoji === emoji) {
+        found = r;
+        break;
+      }
+    }
+
+    if (found) {
+      // Remove reaction
+      ctx.db.messageReaction.id.delete(found.id);
+    } else {
+      // Add reaction
+      ctx.db.messageReaction.insert({ id: 0n, messageId, userIdentity: ctx.sender, emoji });
+    }
+  }
+);
+
+// Create or open a direct message conversation with another user
+export const createDm = spacetimedb.reducer(
+  { targetIdentity: t.identity() },
+  (ctx, { targetIdentity }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) throw new SenderError('Set your name first');
+    if (!ctx.db.user.identity.find(targetIdentity)) throw new SenderError('Target user not found');
+    if (ctx.sender.toHexString() === targetIdentity.toHexString()) throw new SenderError('Cannot DM yourself');
+
+    // Deterministic room name — always sorted alphabetically so both users get the same name
+    const a = ctx.sender.toHexString();
+    const b = targetIdentity.toHexString();
+    const [first, second] = a < b ? [a, b] : [b, a];
+    const dmName = `__dm__${first}_${second}`;
+
+    // Check if DM room already exists
+    const existing = ctx.db.room.name.find(dmName);
+    if (existing) {
+      // Ensure caller is a member (e.g., re-opening after leave)
+      let isMember = false;
+      for (const m of [...ctx.db.roomMember.roomId.filter(existing.id)]) {
+        if (m.userIdentity.toHexString() === ctx.sender.toHexString()) { isMember = true; break; }
+      }
+      if (!isMember) {
+        ctx.db.roomMember.insert({ id: 0n, roomId: existing.id, userIdentity: ctx.sender, joinedAt: ctx.timestamp });
+      }
+      return;
+    }
+
+    // Create the DM room
+    const roomId = ctx.db.room.insert({
+      id: 0n,
+      name: dmName,
+      createdBy: ctx.sender,
+      createdAt: ctx.timestamp,
+      isPrivate: true,
+      isDm: true,
+    }).id;
+
+    ctx.db.roomMember.insert({ id: 0n, roomId, userIdentity: ctx.sender, joinedAt: ctx.timestamp });
+    ctx.db.roomMember.insert({ id: 0n, roomId, userIdentity: targetIdentity, joinedAt: ctx.timestamp });
+  }
+);
+
+// Invite a user to a private room (admin only)
+export const inviteUser = spacetimedb.reducer(
+  { roomId: t.u64(), targetIdentity: t.identity() },
+  (ctx, { roomId, targetIdentity }) => {
+    const room = ctx.db.room.id.find(roomId);
+    if (!room) throw new SenderError('Room not found');
+    if (!room.isPrivate) throw new SenderError('Only private rooms support invitations');
+
+    // Must be admin
+    let callerIsAdmin = false;
+    for (const a of [...ctx.db.roomAdmin.roomId.filter(roomId)]) {
+      if (a.userIdentity.toHexString() === ctx.sender.toHexString()) { callerIsAdmin = true; break; }
+    }
+    if (!callerIsAdmin) {
+      if (room.createdBy.toHexString() === ctx.sender.toHexString()) callerIsAdmin = true;
+    }
+    if (!callerIsAdmin) throw new SenderError('Not authorized');
+
+    if (!ctx.db.user.identity.find(targetIdentity)) throw new SenderError('User not found');
+
+    // Check not already a member
+    for (const m of [...ctx.db.roomMember.roomId.filter(roomId)]) {
+      if (m.userIdentity.toHexString() === targetIdentity.toHexString()) {
+        throw new SenderError('User is already a member');
+      }
+    }
+
+    // Check not already invited
+    for (const inv of [...ctx.db.roomInvitation.inviteeIdentity.filter(targetIdentity)]) {
+      if (inv.roomId === roomId) throw new SenderError('User already has a pending invitation');
+    }
+
+    ctx.db.roomInvitation.insert({ id: 0n, roomId, inviterIdentity: ctx.sender, inviteeIdentity: targetIdentity, createdAt: ctx.timestamp });
+  }
+);
+
+// Accept a room invitation — adds caller to the room
+export const acceptInvitation = spacetimedb.reducer(
+  { invitationId: t.u64() },
+  (ctx, { invitationId }) => {
+    const inv = ctx.db.roomInvitation.id.find(invitationId);
+    if (!inv) throw new SenderError('Invitation not found');
+    if (inv.inviteeIdentity.toHexString() !== ctx.sender.toHexString()) {
+      throw new SenderError('Not your invitation');
+    }
+
+    const room = ctx.db.room.id.find(inv.roomId);
+    if (!room) throw new SenderError('Room no longer exists');
+
+    ctx.db.roomMember.insert({ id: 0n, roomId: inv.roomId, userIdentity: ctx.sender, joinedAt: ctx.timestamp });
+    ctx.db.roomInvitation.id.delete(invitationId);
+  }
+);
+
+// Decline a room invitation
+export const declineInvitation = spacetimedb.reducer(
+  { invitationId: t.u64() },
+  (ctx, { invitationId }) => {
+    const inv = ctx.db.roomInvitation.id.find(invitationId);
+    if (!inv) throw new SenderError('Invitation not found');
+    if (inv.inviteeIdentity.toHexString() !== ctx.sender.toHexString()) {
+      throw new SenderError('Not your invitation');
+    }
+    ctx.db.roomInvitation.id.delete(invitationId);
+  }
+);
+
+// Save or clear a message draft for a room
+// If text is empty, the draft is deleted; otherwise it is upserted
+export const saveDraft = spacetimedb.reducer(
+  { roomId: t.u64(), text: t.string() },
+  (ctx, { roomId, text }) => {
+    if (!ctx.db.user.identity.find(ctx.sender)) return;
+    if (!ctx.db.room.id.find(roomId)) return;
+
+    let found: { id: bigint; roomId: bigint; userIdentity: { toHexString(): string }; text: string; updatedAt: { microsSinceUnixEpoch: bigint } } | undefined;
+    for (const d of [...ctx.db.draft.roomId.filter(roomId)]) {
+      if (d.userIdentity.toHexString() === ctx.sender.toHexString()) {
+        found = d;
+        break;
+      }
+    }
+
+    if (text.length === 0) {
+      if (found) ctx.db.draft.id.delete(found.id);
+    } else {
+      if (found) {
+        ctx.db.draft.id.update({ ...found, text, updatedAt: ctx.timestamp });
+      } else {
+        ctx.db.draft.insert({ id: 0n, roomId, userIdentity: ctx.sender, text, updatedAt: ctx.timestamp });
+      }
+    }
+  }
+);
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/package-lock.json b/tools/llm-sequential-upgrade/perf-benchmark/package-lock.json
new file mode 100644
index 00000000000..ad3d7ec5763
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/package-lock.json
@@ -0,0 +1,787 @@
+{
+  "name": "perf-benchmark",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "perf-benchmark",
+      "version": "0.1.0",
+      "dependencies": {
+        "socket.io-client": "^4.7.4",
+        "spacetimedb": "^2.0.0",
+        "tsx": "^4.19.0",
+        "typescript": "^5.4.0"
+      }
+    },
+    "node_modules/@esbuild/aix-ppc64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz",
+      "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "aix"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz",
+      "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz",
+      "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz",
+      "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz",
+      "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz",
+      "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz",
+      "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz",
+      "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz",
+      "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ia32": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz",
+      "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-loong64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz",
+      "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==",
+      "cpu": [
+        "loong64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-mips64el": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz",
+      "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==",
+      "cpu": [
+        "mips64el"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ppc64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz",
+      "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-riscv64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz",
+      "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-s390x": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz",
+      "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz",
+      "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz",
+      "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz",
+      "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openharmony-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz",
+      "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/sunos-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz",
+      "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "sunos"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz",
+      "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-ia32": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz",
+      "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz",
+      "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@socket.io/component-emitter": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz",
+      "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==",
+      "license": "MIT"
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/engine.io-client": {
+      "version": "6.6.4",
+      "resolved": "https://registry.npmjs.org/engine.io-client/-/engine.io-client-6.6.4.tgz",
+      "integrity": "sha512-+kjUJnZGwzewFDw951CDWcwj35vMNf2fcj7xQWOctq1F2i1jkDdVvdFG9kM/BEChymCH36KgjnW0NsL58JYRxw==",
+      "license": "MIT",
+      "dependencies": {
+        "@socket.io/component-emitter": "~3.1.0",
+        "debug": "~4.4.1",
+        "engine.io-parser": "~5.2.1",
+        "ws": "~8.18.3",
+        "xmlhttprequest-ssl": "~2.1.1"
+      }
+    },
+    "node_modules/engine.io-parser": {
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz",
+      "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/esbuild": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz",
+      "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "@esbuild/aix-ppc64": "0.27.7",
+        "@esbuild/android-arm": "0.27.7",
+        "@esbuild/android-arm64": "0.27.7",
+        "@esbuild/android-x64": "0.27.7",
+        "@esbuild/darwin-arm64": "0.27.7",
+        "@esbuild/darwin-x64": "0.27.7",
+        "@esbuild/freebsd-arm64": "0.27.7",
+        "@esbuild/freebsd-x64": "0.27.7",
+        "@esbuild/linux-arm": "0.27.7",
+        "@esbuild/linux-arm64": "0.27.7",
+        "@esbuild/linux-ia32": "0.27.7",
+        "@esbuild/linux-loong64": "0.27.7",
+        "@esbuild/linux-mips64el": "0.27.7",
+        "@esbuild/linux-ppc64": "0.27.7",
+        "@esbuild/linux-riscv64": "0.27.7",
+        "@esbuild/linux-s390x": "0.27.7",
+        "@esbuild/linux-x64": "0.27.7",
+        "@esbuild/netbsd-arm64": "0.27.7",
+        "@esbuild/netbsd-x64": "0.27.7",
+        "@esbuild/openbsd-arm64": "0.27.7",
+        "@esbuild/openbsd-x64": "0.27.7",
+        "@esbuild/openharmony-arm64": "0.27.7",
+        "@esbuild/sunos-x64": "0.27.7",
+        "@esbuild/win32-arm64": "0.27.7",
+        "@esbuild/win32-ia32": "0.27.7",
+        "@esbuild/win32-x64": "0.27.7"
+      }
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/get-tsconfig": {
+      "version": "4.13.7",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz",
+      "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==",
+      "license": "MIT",
+      "dependencies": {
+        "resolve-pkg-maps": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
+      }
+    },
+    "node_modules/headers-polyfill": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/headers-polyfill/-/headers-polyfill-4.0.3.tgz",
+      "integrity": "sha512-IScLbePpkvO846sIwOtOTDjutRMWdXdJmXdMvk6gCBHxFO8d+QKOQedyZSxFTTFYRSmlgSTDtXqqq4pcenBXLQ==",
+      "license": "MIT"
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/object-inspect": {
+      "version": "1.13.4",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+      "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/prettier": {
+      "version": "3.8.2",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.2.tgz",
+      "integrity": "sha512-8c3mgTe0ASwWAJK+78dpviD+A8EqhndQPUBpNUIPt6+xWlIigCwfN01lWr9MAede4uqXGTEKeQWTvzb3vjia0Q==",
+      "license": "MIT",
+      "bin": {
+        "prettier": "bin/prettier.cjs"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/prettier/prettier?sponsor=1"
+      }
+    },
+    "node_modules/pure-rand": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-7.0.1.tgz",
+      "integrity": "sha512-oTUZM/NAZS8p7ANR3SHh30kXB+zK2r2BPcEn/awJIbOvq82WoMN4p62AWWp3Hhw50G0xMsw1mhIBLqHw64EcNQ==",
+      "funding": [
+        {
+          "type": "individual",
+          "url": "https://github.com/sponsors/dubzzz"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fast-check"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/resolve-pkg-maps": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
+      }
+    },
+    "node_modules/safe-stable-stringify": {
+      "version": "2.5.0",
+      "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
+      "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/socket.io-client": {
+      "version": "4.8.3",
+      "resolved": "https://registry.npmjs.org/socket.io-client/-/socket.io-client-4.8.3.tgz",
+      "integrity": "sha512-uP0bpjWrjQmUt5DTHq9RuoCBdFJF10cdX9X+a368j/Ft0wmaVgxlrjvK3kjvgCODOMMOz9lcaRzxmso0bTWZ/g==",
+      "license": "MIT",
+      "dependencies": {
+        "@socket.io/component-emitter": "~3.1.0",
+        "debug": "~4.4.1",
+        "engine.io-client": "~6.6.1",
+        "socket.io-parser": "~4.2.4"
+      },
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/socket.io-parser": {
+      "version": "4.2.6",
+      "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.6.tgz",
+      "integrity": "sha512-asJqbVBDsBCJx0pTqw3WfesSY0iRX+2xzWEWzrpcH7L6fLzrhyF8WPI8UaeM4YCuDfpwA/cgsdugMsmtz8EJeg==",
+      "license": "MIT",
+      "dependencies": {
+        "@socket.io/component-emitter": "~3.1.0",
+        "debug": "~4.4.1"
+      },
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/spacetimedb": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/spacetimedb/-/spacetimedb-2.1.0.tgz",
+      "integrity": "sha512-Kzs+HXCRj15ryld03ztU4a2uQg0M8ivV/9Bk/gvMpb59lLc/A2/r7UkGCYBePsBL7Zwqgr8gE8FeufoZVXtPnA==",
+      "license": "ISC",
+      "dependencies": {
+        "base64-js": "^1.5.1",
+        "headers-polyfill": "^4.0.3",
+        "object-inspect": "^1.13.4",
+        "prettier": "^3.3.3",
+        "pure-rand": "^7.0.1",
+        "safe-stable-stringify": "^2.5.0",
+        "statuses": "^2.0.2",
+        "url-polyfill": "^1.1.14"
+      },
+      "peerDependencies": {
+        "@angular/core": ">=17.0.0",
+        "@tanstack/react-query": "^5.0.0",
+        "react": "^18.0.0 || ^19.0.0-0 || ^19.0.0",
+        "svelte": "^4.0.0 || ^5.0.0",
+        "undici": "^6.19.2",
+        "vue": "^3.3.0"
+      },
+      "peerDependenciesMeta": {
+        "@angular/core": {
+          "optional": true
+        },
+        "@tanstack/react-query": {
+          "optional": true
+        },
+        "react": {
+          "optional": true
+        },
+        "svelte": {
+          "optional": true
+        },
+        "undici": {
+          "optional": true
+        },
+        "vue": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/statuses": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
+      "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/tsx": {
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
+      "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
+      "license": "MIT",
+      "dependencies": {
+        "esbuild": "~0.27.0",
+        "get-tsconfig": "^4.7.5"
+      },
+      "bin": {
+        "tsx": "dist/cli.mjs"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/url-polyfill": {
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/url-polyfill/-/url-polyfill-1.1.14.tgz",
+      "integrity": "sha512-p4f3TTAG6ADVF3mwbXw7hGw+QJyw5CnNGvYh5fCuQQZIiuKUswqcznyV3pGDP9j0TSmC4UvRKm8kl1QsX1diiQ==",
+      "license": "MIT"
+    },
+    "node_modules/ws": {
+      "version": "8.18.3",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
+      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/xmlhttprequest-ssl": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-2.1.2.tgz",
+      "integrity": "sha512-TEU+nJVUUnA4CYJFLvK5X9AOeH4KvDvhIfm0vV1GaQRtchnG0hgK5p8hw/xjv8cunWYCsiPCSDzObPyhEwq3KQ==",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    }
+  }
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/package.json b/tools/llm-sequential-upgrade/perf-benchmark/package.json
new file mode 100644
index 00000000000..b8386d6318b
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "perf-benchmark",
+  "private": true,
+  "type": "module",
+  "version": "0.1.0",
+  "scripts": {
+    "run": "tsx src/main.ts"
+  },
+  "dependencies": {
+    "socket.io-client": "^4.7.4",
+    "spacetimedb": "^2.0.0",
+    "tsx": "^4.19.0",
+    "typescript": "^5.4.0"
+  }
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/clients/postgres-client.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/postgres-client.ts
new file mode 100644
index 00000000000..6ba2b353f84
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/postgres-client.ts
@@ -0,0 +1,114 @@
+// Postgres chat-app client wrapper for the perf benchmark.
+//
+// The Level 12 generated app exposes:
+//   POST /api/users { name } -> { id, name, ... }
+//   POST /api/rooms { name, userId, isPrivate: false } -> { id, ... }
+//   POST /api/rooms/:id/join { userId }
+//   socket.emit('register', { userId, userName })
+//   socket.emit('join_room', { roomId })
+//   socket.emit('send_message', { roomId, content })
+//   socket.on('message', cb)            // top-level messages broadcast to room subscribers
+//
+// Notes:
+// - The send_message handler enforces a 500ms per-user rate limit (server/src/index.ts).
+//   This means each writer can issue at most ~2 msgs/sec. Throughput must scale via writers.
+// - The handler does NOT call a socket.io ack callback. We treat the round-trip
+//   "send → server inserts → server emits 'message' back to me" as ack latency.
+// - All client connections in a single Node process share clocks, so fan-out latency
+//   measured by a separate listener client is meaningful.
+
+import { io, type Socket } from 'socket.io-client';
+
+export interface PgConfig {
+  baseUrl: string; // e.g. http://localhost:6001
+}
+
+export interface PgUser {
+  id: number;
+  name: string;
+}
+
+export async function createPgUser(cfg: PgConfig, name: string): Promise<PgUser> {
+  const res = await fetch(`${cfg.baseUrl}/api/users`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    // Send both field names to accommodate different LLM-generated API shapes
+    // (20260406 uses `name`, 20260403 uses `username`)
+    body: JSON.stringify({ name, username: name }),
+  });
+  if (!res.ok) throw new Error(`createPgUser ${name} failed: ${res.status} ${await res.text()}`);
+  return (await res.json()) as PgUser;
+}
+
+export async function createPgRoom(cfg: PgConfig, name: string, userId: number): Promise<{ id: number }> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ name, userId, isPrivate: false }),
+  });
+  if (!res.ok) throw new Error(`createPgRoom ${name} failed: ${res.status} ${await res.text()}`);
+  return (await res.json()) as { id: number };
+}
+
+export async function joinPgRoom(cfg: PgConfig, roomId: number, userId: number): Promise<void> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms/${roomId}/join`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ userId }),
+  });
+  if (!res.ok) throw new Error(`joinPgRoom ${roomId} failed: ${res.status} ${await res.text()}`);
+}
+
+export interface PgClientHandle {
+  socket: Socket;
+  user: PgUser;
+  close(): void;
+}
+
+export async function connectPgClient(
+  cfg: PgConfig,
+  user: PgUser,
+  roomId: number,
+  onMessage: (msg: { id: number; roomId: number; userId: number; content: string }) => void,
+): Promise<PgClientHandle> {
+  const socket = io(cfg.baseUrl, {
+    transports: ['websocket'],
+    reconnection: false,
+    forceNew: true,
+  });
+  await new Promise<void>((resolve, reject) => {
+    socket.once('connect', () => resolve());
+    socket.once('connect_error', (err) => reject(err));
+    setTimeout(() => reject(new Error('socket connect timeout')), 10_000);
+  });
+  socket.emit('register', { userId: user.id, userName: user.name });
+  socket.emit('join_room', { roomId });
+  // Listen for both event names — 20260406 uses 'message', 20260403 uses 'new_message'
+  socket.on('message', onMessage);
+  socket.on('new_message', onMessage);
+  return {
+    socket,
+    user,
+    close: () => {
+      try { socket.disconnect(); } catch { /* ignore */ }
+    },
+  };
+}
+
+export function pgSend(handle: PgClientHandle, roomId: number, content: string): void {
+  // Try socket emit first (20260406 style). If the server doesn't handle
+  // 'send_message' via socket (20260403 uses REST), the message is silently
+  // dropped and the REST fallback in pgSendRest should be used instead.
+  handle.socket.emit('send_message', { roomId, content });
+}
+
+// REST-based send for 20260403 PG (POST /api/rooms/:roomId/messages)
+export async function pgSendRest(cfg: PgConfig, roomId: number, userId: number, content: string): Promise<any> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms/${roomId}/messages`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ userId, content }),
+  });
+  if (!res.ok) return null;
+  return res.json();
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/clients/spacetime-client.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/spacetime-client.ts
new file mode 100644
index 00000000000..d6cda971eec
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/spacetime-client.ts
@@ -0,0 +1,104 @@
+// SpacetimeDB chat-app client wrapper for the perf benchmark.
+//
+// The Level 12 generated module exposes:
+//   reducer set_name({ name })             — register the calling identity as a user
+//   reducer create_room({ name, isPrivate }) — create a public room (auto-joins creator)
+//   reducer join_room({ roomId })            — join an existing public room
+//   reducer send_message({ roomId, text })   — insert a message into a room
+//
+// Bindings live in ./module_bindings (regenerate via `spacetime generate ...`
+// — see README). Each connection gets its own anonymous identity, so N writers =
+// N independent connections, no per-user rate limiting.
+
+import { DbConnection } from '../module_bindings/index.ts';
+
+export interface StdbConfig {
+  uri: string; // ws://localhost:3000
+  moduleName: string; // chat-app-<timestamp>
+}
+
+export interface StdbHandle {
+  conn: InstanceType<typeof DbConnection>;
+  close(): void;
+}
+
+export async function connectStdb(
+  cfg: StdbConfig,
+  opts: {
+    onMessage?: (row: { id: bigint; roomId: bigint; text: string }) => void;
+    subscriptions?: string[];
+  } = {},
+): Promise<StdbHandle> {
+  const subscriptions = opts.subscriptions ?? [
+    'SELECT * FROM user',
+    'SELECT * FROM room',
+    'SELECT * FROM room_member',
+    'SELECT * FROM message',
+  ];
+
+  const conn = await new Promise<InstanceType<typeof DbConnection>>((resolve, reject) => {
+    const c = DbConnection.builder()
+      .withUri(cfg.uri)
+      .withDatabaseName(cfg.moduleName)
+      .onConnect((connection) => {
+        if (subscriptions.length === 0) {
+          resolve(connection);
+          return;
+        }
+        connection
+          .subscriptionBuilder()
+          .onApplied(() => resolve(connection))
+          .onError((ctx: { event?: Error }) => reject(ctx.event ?? new Error('subscription error')))
+          .subscribe(subscriptions);
+      })
+      .onConnectError((_ctx: unknown, err: Error) => reject(err))
+      .build();
+    setTimeout(() => reject(new Error('stdb connect timeout')), 15_000);
+    void c;
+  });
+
+  if (opts.onMessage) {
+    // The accessor is the camelCase table name; the row type comes from the bindings.
+    (conn.db as any).message.onInsert((_ctx: unknown, row: { id: bigint; roomId: bigint; text: string }) => {
+      opts.onMessage!(row);
+    });
+  }
+
+  return {
+    conn,
+    close: () => {
+      try { (conn as any).disconnect?.(); } catch { /* ignore */ }
+    },
+  };
+}
+
+export async function stdbSetName(h: StdbHandle, name: string): Promise<void> {
+  // 20260406 uses `setName`, 20260403 uses `register`
+  const reducers = h.conn.reducers as any;
+  if (typeof reducers.setName === 'function') {
+    await reducers.setName({ name });
+  } else if (typeof reducers.register === 'function') {
+    await reducers.register({ name });
+  } else {
+    throw new Error('No setName or register reducer found');
+  }
+}
+
+export async function stdbCreateRoom(h: StdbHandle, name: string): Promise<void> {
+  await (h.conn.reducers as any).createRoom({ name, isPrivate: false });
+}
+
+export async function stdbJoinRoom(h: StdbHandle, roomId: bigint): Promise<void> {
+  await (h.conn.reducers as any).joinRoom({ roomId });
+}
+
+export async function stdbSendMessage(h: StdbHandle, roomId: bigint, text: string): Promise<void> {
+  await (h.conn.reducers as any).sendMessage({ roomId, text });
+}
+
+// Look up a room id by name (after subscribing to the room table).
+export function stdbFindRoomIdByName(h: StdbHandle, name: string): bigint | null {
+  const rows = [...((h.conn.db as any).room.iter() as Iterable<{ id: bigint; name: string }>)];
+  const match = rows.find((r) => r.name === name);
+  return match ? match.id : null;
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/generate-summary.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/generate-summary.ts
new file mode 100644
index 00000000000..1b0a484488f
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/generate-summary.ts
@@ -0,0 +1,96 @@
+// Reads all per-scenario JSON results from a directory and emits summary.md
+// and summary.json side-by-side comparing PG vs STDB.
+
+import { readdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import type { ScenarioResult } from './metrics.ts';
+
+function loadAll(dir: string): ScenarioResult[] {
+  const out: ScenarioResult[] = [];
+  for (const f of readdirSync(dir)) {
+    if (!f.endsWith('.json') || f === 'summary.json') continue;
+    out.push(JSON.parse(readFileSync(join(dir, f), 'utf8')) as ScenarioResult);
+  }
+  return out;
+}
+
+function fmt(n: number, digits = 1): string {
+  return n.toFixed(digits);
+}
+
+function main(): void {
+  const args = process.argv.slice(2);
+  const pgDir = args[0] ?? 'results/full-pg';
+  const stdbDir = args[1] ?? 'results/full-stdb';
+  const outDir = args[2] ?? 'results';
+
+  const pg = loadAll(pgDir);
+  const stdb = loadAll(stdbDir);
+  const byScenario = (rs: ScenarioResult[], s: string): ScenarioResult | undefined =>
+    rs.find((r) => r.scenario === s);
+
+  const scenarios = ['stress-throughput', 'realistic-chat'] as const;
+
+  const lines: string[] = [];
+  lines.push('# Perf Benchmark Summary - PG vs STDB Chat Apps');
+  lines.push('');
+  lines.push('Runtime performance of the **Level 12 chat apps the LLM built** in the sequential upgrade benchmark.');
+  lines.push('Both apps run on the same dev machine against a local DB. Numbers reflect what shipped, not the theoretical ceiling of either backend.');
+  lines.push('');
+
+  for (const sc of scenarios) {
+    const p = byScenario(pg, sc);
+    const s = byScenario(stdb, sc);
+    if (!p && !s) continue;
+    lines.push(`## ${sc}`);
+    lines.push('');
+    lines.push('| Metric | PostgreSQL | SpacetimeDB |');
+    lines.push('|---|---|---|');
+    lines.push(`| Sustained throughput (msgs/sec) | ${p ? fmt(p.msgsPerSec) : '-'} | ${s ? fmt(s.msgsPerSec) : '-'} |`);
+    lines.push(`| Messages received | ${p?.received ?? '-'} | ${s?.received ?? '-'} |`);
+    lines.push(`| Fan-out latency p50 (ms) | ${p ? fmt(p.fanoutLatencyMs.p50) : '-'} | ${s ? fmt(s.fanoutLatencyMs.p50) : '-'} |`);
+    lines.push(`| Fan-out latency p99 (ms) | ${p ? fmt(p.fanoutLatencyMs.p99) : '-'} | ${s ? fmt(s.fanoutLatencyMs.p99) : '-'} |`);
+    if (p?.ackLatencyMs.count || s?.ackLatencyMs.count) {
+      lines.push(`| Ack latency p50 (ms) | ${p?.ackLatencyMs.count ? fmt(p.ackLatencyMs.p50) : '-'} | ${s?.ackLatencyMs.count ? fmt(s.ackLatencyMs.p50) : '-'} |`);
+      lines.push(`| Ack latency p99 (ms) | ${p?.ackLatencyMs.count ? fmt(p.ackLatencyMs.p99) : '-'} | ${s?.ackLatencyMs.count ? fmt(s.ackLatencyMs.p99) : '-'} |`);
+    }
+    if (p?.notes) lines.push(`\n**PG note:** ${p.notes}`);
+    if (s?.notes) lines.push(`\n**STDB note:** ${s.notes}`);
+    lines.push('');
+  }
+
+  const stress = { pg: byScenario(pg, 'stress-throughput'), stdb: byScenario(stdb, 'stress-throughput') };
+  if (stress.pg && stress.stdb) {
+    const ratio = stress.stdb.msgsPerSec / stress.pg.msgsPerSec;
+    lines.push('## Headline');
+    lines.push('');
+    lines.push(`Under stress, the SpacetimeDB app delivered **${fmt(ratio, 0)}x the throughput** of the PostgreSQL app `);
+    lines.push(`(${fmt(stress.stdb.msgsPerSec)} vs ${fmt(stress.pg.msgsPerSec)} msgs/sec)`);
+    lines.push(`with comparable p99 fan-out latency (${fmt(stress.stdb.fanoutLatencyMs.p99)}ms vs ${fmt(stress.pg.fanoutLatencyMs.p99)}ms).`);
+    lines.push('');
+    lines.push('The PG send_message handler serializes 5 DB queries per message (ban check, membership check,');
+    lines.push('`lastSeen` update, insert, roomMembers query for notifications) - all awaited, no batching.');
+    lines.push('The SpacetimeDB reducer does a single transaction. **This is what shipped from the same prompt** -');
+    lines.push('the LLM reached for a familiar REST pattern on PG and a minimal reducer on STDB, and the');
+    lines.push("generated code's structure dominates the throughput gap.");
+  }
+
+  writeFileSync(join(outDir, 'summary.md'), lines.join('\n'));
+
+  const summary = {
+    pg: Object.fromEntries(pg.map((r) => [r.scenario, r])),
+    stdb: Object.fromEntries(stdb.map((r) => [r.scenario, r])),
+    headline: stress.pg && stress.stdb ? {
+      stressMsgsPerSecPg: stress.pg.msgsPerSec,
+      stressMsgsPerSecStdb: stress.stdb.msgsPerSec,
+      stressRatio: stress.stdb.msgsPerSec / stress.pg.msgsPerSec,
+      stressP99FanoutPg: stress.pg.fanoutLatencyMs.p99,
+      stressP99FanoutStdb: stress.stdb.fanoutLatencyMs.p99,
+    } : null,
+  };
+  writeFileSync(join(outDir, 'summary.json'), JSON.stringify(summary, null, 2));
+
+  console.log(`Wrote ${join(outDir, 'summary.md')} and summary.json`);
+}
+
+main();
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts
new file mode 100644
index 00000000000..34a07018ad4
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts
@@ -0,0 +1,125 @@
+// CLI entry point for the perf benchmark.
+//
+// Usage:
+//   tsx src/main.ts --backend pg --scenario stress      [--writers 50] [--duration 60]
+//   tsx src/main.ts --backend stdb --scenario realistic [--users 100] [--duration 120]
+//   tsx src/main.ts --backend stdb --scenario all
+//
+// PG defaults: http://localhost:6001
+// STDB defaults: ws://localhost:3000, module from --module flag
+
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import {
+  runStressPostgres,
+  runStressSpacetime,
+  type StressOpts,
+} from './scenarios/stress-throughput.ts';
+import {
+  runRealisticPostgres,
+  runRealisticSpacetime,
+  type RealisticOpts,
+} from './scenarios/realistic-chat.ts';
+import type { ScenarioResult } from './metrics.ts';
+
+interface CliArgs {
+  backend: 'pg' | 'stdb';
+  scenario: 'stress' | 'realistic' | 'all';
+  pgUrl: string;
+  stdbUri: string;
+  stdbModule: string;
+  writers: number;
+  users: number;
+  duration: number;
+  out: string;
+}
+
+function parseArgs(argv: string[]): CliArgs {
+  const a: CliArgs = {
+    backend: 'pg',
+    scenario: 'stress',
+    pgUrl: 'http://localhost:6001',
+    stdbUri: 'ws://localhost:3000',
+    stdbModule: '',
+    writers: 20,
+    users: 50,
+    duration: 30,
+    out: '',
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const k = argv[i];
+    const v = argv[i + 1];
+    switch (k) {
+      case '--backend': a.backend = v as 'pg' | 'stdb'; i++; break;
+      case '--scenario': a.scenario = v as CliArgs['scenario']; i++; break;
+      case '--pg-url': a.pgUrl = v!; i++; break;
+      case '--stdb-uri': a.stdbUri = v!; i++; break;
+      case '--module': a.stdbModule = v!; i++; break;
+      case '--writers': a.writers = parseInt(v!); i++; break;
+      case '--users': a.users = parseInt(v!); i++; break;
+      case '--duration': a.duration = parseInt(v!); i++; break;
+      case '--out': a.out = v!; i++; break;
+    }
+  }
+  return a;
+}
+
+async function runOne(args: CliArgs, scenario: 'stress' | 'realistic'): Promise<ScenarioResult> {
+  if (args.backend === 'pg') {
+    const cfg = { baseUrl: args.pgUrl };
+    if (scenario === 'stress') return runStressPostgres(cfg, { writers: args.writers, durationSec: args.duration });
+    return runRealisticPostgres(cfg, { users: args.users, durationSec: args.duration, minIntervalMs: 5000, maxIntervalMs: 15000 });
+  } else {
+    if (!args.stdbModule) throw new Error('--module is required for stdb');
+    const cfg = { uri: args.stdbUri, moduleName: args.stdbModule };
+    if (scenario === 'stress') return runStressSpacetime(cfg, { writers: args.writers, durationSec: args.duration });
+    return runRealisticSpacetime(cfg, { users: args.users, durationSec: args.duration, minIntervalMs: 5000, maxIntervalMs: 15000 });
+  }
+}
+
+function summarize(r: ScenarioResult): string {
+  const ack = r.ackLatencyMs;
+  const fan = r.fanoutLatencyMs;
+  return [
+    `[${r.backend}] ${r.scenario}: ${r.received}/${r.sent} msgs in ${r.durationSec}s`,
+    `  throughput: ${r.msgsPerSec.toFixed(1)} msgs/sec`,
+    `  ack       p50=${ack.p50.toFixed(1)}ms p99=${ack.p99.toFixed(1)}ms (n=${ack.count})`,
+    `  fanout    p50=${fan.p50.toFixed(1)}ms p99=${fan.p99.toFixed(1)}ms (n=${fan.count})`,
+    r.notes ? `  note: ${r.notes}` : '',
+  ].filter(Boolean).join('\n');
+}
+
+async function main(): Promise<void> {
+  const args = parseArgs(process.argv.slice(2));
+  const __dirname = dirname(fileURLToPath(import.meta.url));
+  const stamp = new Date().toISOString().replace(/[:.]/g, '-');
+  const outDir = args.out || join(__dirname, '..', 'results', stamp);
+  mkdirSync(outDir, { recursive: true });
+
+  const scenarios: Array<'stress' | 'realistic'> =
+    args.scenario === 'all' ? ['stress', 'realistic'] : [args.scenario];
+
+  const results: ScenarioResult[] = [];
+  for (const sc of scenarios) {
+    console.log(`\n=== ${args.backend} / ${sc} ===`);
+    try {
+      const r = await runOne(args, sc);
+      results.push(r);
+      console.log(summarize(r));
+      writeFileSync(
+        join(outDir, `${args.backend}-${sc}.json`),
+        JSON.stringify(r, (_k, v) => (typeof v === 'bigint' ? v.toString() : v), 2),
+      );
+    } catch (err) {
+      console.error(`FAILED ${args.backend}/${sc}:`, err);
+    }
+  }
+
+  console.log(`\nResults written to ${outDir}`);
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts
new file mode 100644
index 00000000000..c41a5f16a03
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts
@@ -0,0 +1,81 @@
+// Simple latency aggregator: stores raw samples in ms, computes percentiles on demand.
+// For our scenarios (≤ a few hundred thousand samples) this is plenty.
+
+export class LatencyHistogram {
+  private samples: number[] = [];
+
+  record(ms: number): void {
+    this.samples.push(ms);
+  }
+
+  count(): number {
+    return this.samples.length;
+  }
+
+  summary(): LatencySummary {
+    if (this.samples.length === 0) {
+      return { count: 0, min: 0, max: 0, mean: 0, p50: 0, p95: 0, p99: 0, p999: 0 };
+    }
+    const sorted = [...this.samples].sort((a, b) => a - b);
+    const pct = (p: number): number => sorted[Math.min(sorted.length - 1, Math.floor(p * sorted.length))]!;
+    const sum = sorted.reduce((a, b) => a + b, 0);
+    return {
+      count: sorted.length,
+      min: sorted[0]!,
+      max: sorted[sorted.length - 1]!,
+      mean: sum / sorted.length,
+      p50: pct(0.50),
+      p95: pct(0.95),
+      p99: pct(0.99),
+      p999: pct(0.999),
+    };
+  }
+}
+
+export interface LatencySummary {
+  count: number;
+  min: number;
+  max: number;
+  mean: number;
+  p50: number;
+  p95: number;
+  p99: number;
+  p999: number;
+}
+
+export interface ScenarioResult {
+  scenario: string;
+  backend: 'postgres' | 'spacetime';
+  startedAt: string;
+  durationSec: number;
+  writers: number;
+  sent: number;
+  received: number;
+  errors: number;
+  msgsPerSec: number;
+  ackLatencyMs: LatencySummary;
+  fanoutLatencyMs: LatencySummary;
+  notes?: string;
+}
+
+// Encode/decode timestamp + sequence into the message text so the listener can compute fan-out latency.
+const MARKER = '__bench:';
+export function stampMessage(seq: number): string {
+  return `${MARKER}${process.hrtime.bigint().toString()}:${seq}:hello`;
+}
+
+export function parseStamp(text: string): { sentNs: bigint; seq: number } | null {
+  if (!text.startsWith(MARKER)) return null;
+  const rest = text.slice(MARKER.length);
+  const parts = rest.split(':');
+  if (parts.length < 2) return null;
+  try {
+    return { sentNs: BigInt(parts[0]!), seq: parseInt(parts[1]!) };
+  } catch {
+    return null;
+  }
+}
+
+export function nsToMs(deltaNs: bigint): number {
+  return Number(deltaNs) / 1_000_000;
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/accept_invitation_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/accept_invitation_reducer.ts
new file mode 100644
index 00000000000..5cbc2cea6af
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/accept_invitation_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  invitationId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/banned_user_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/banned_user_table.ts
new file mode 100644
index 00000000000..023c08fc271
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/banned_user_table.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/cancel_scheduled_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/cancel_scheduled_message_reducer.ts
new file mode 100644
index 00000000000..a6a1d54143f
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/cancel_scheduled_message_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  scheduledId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_dm_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_dm_reducer.ts
new file mode 100644
index 00000000000..97f48e45468
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_dm_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  targetName: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_room_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_room_reducer.ts
new file mode 100644
index 00000000000..d25591473d5
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/create_room_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  name: __t.string(),
+  isPrivate: __t.bool(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/decline_invitation_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/decline_invitation_reducer.ts
new file mode 100644
index 00000000000..5cbc2cea6af
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/decline_invitation_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  invitationId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/draft_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/draft_table.ts
new file mode 100644
index 00000000000..43d5b5648b4
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/draft_table.ts
@@ -0,0 +1,19 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+  text: __t.string(),
+  updatedAt: __t.timestamp().name("updated_at"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/edit_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/edit_message_reducer.ts
new file mode 100644
index 00000000000..1418afbd390
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/edit_message_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  messageId: __t.u64(),
+  newText: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/index.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/index.ts
new file mode 100644
index 00000000000..ff357b3e747
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/index.ts
@@ -0,0 +1,370 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+// This was generated using spacetimedb cli version 2.1.0 (commit 6981f48b4bc1a71c8dd9bdfe5a2c343f6370243d).
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  DbConnectionBuilder as __DbConnectionBuilder,
+  DbConnectionImpl as __DbConnectionImpl,
+  SubscriptionBuilderImpl as __SubscriptionBuilderImpl,
+  TypeBuilder as __TypeBuilder,
+  Uuid as __Uuid,
+  convertToAccessorMap as __convertToAccessorMap,
+  makeQueryBuilder as __makeQueryBuilder,
+  procedureSchema as __procedureSchema,
+  procedures as __procedures,
+  reducerSchema as __reducerSchema,
+  reducers as __reducers,
+  schema as __schema,
+  t as __t,
+  table as __table,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type DbConnectionConfig as __DbConnectionConfig,
+  type ErrorContextInterface as __ErrorContextInterface,
+  type Event as __Event,
+  type EventContextInterface as __EventContextInterface,
+  type Infer as __Infer,
+  type QueryBuilder as __QueryBuilder,
+  type ReducerEventContextInterface as __ReducerEventContextInterface,
+  type RemoteModule as __RemoteModule,
+  type SubscriptionEventContextInterface as __SubscriptionEventContextInterface,
+  type SubscriptionHandleImpl as __SubscriptionHandleImpl,
+} from "spacetimedb";
+
+// Import all reducer arg schemas
+import AcceptInvitationReducer from "./accept_invitation_reducer";
+import CancelScheduledMessageReducer from "./cancel_scheduled_message_reducer";
+import CreateDmReducer from "./create_dm_reducer";
+import CreateRoomReducer from "./create_room_reducer";
+import DeclineInvitationReducer from "./decline_invitation_reducer";
+import EditMessageReducer from "./edit_message_reducer";
+import InviteUserReducer from "./invite_user_reducer";
+import JoinRoomReducer from "./join_room_reducer";
+import KickUserReducer from "./kick_user_reducer";
+import LeaveRoomReducer from "./leave_room_reducer";
+import MarkReadReducer from "./mark_read_reducer";
+import PromoteUserReducer from "./promote_user_reducer";
+import RegisterReducer from "./register_reducer";
+import ReplyToMessageReducer from "./reply_to_message_reducer";
+import SaveDraftReducer from "./save_draft_reducer";
+import ScheduleMessageReducer from "./schedule_message_reducer";
+import SendEphemeralMessageReducer from "./send_ephemeral_message_reducer";
+import SendMessageReducer from "./send_message_reducer";
+import SetStatusReducer from "./set_status_reducer";
+import SetTypingReducer from "./set_typing_reducer";
+import ToggleReactionReducer from "./toggle_reaction_reducer";
+
+// Import all procedure arg schemas
+
+// Import all table schema definitions
+import MessageRow from "./message_table";
+import MessageDraftRow from "./message_draft_table";
+import MessageEditRow from "./message_edit_table";
+import MessageReactionRow from "./message_reaction_table";
+import ReadReceiptRow from "./read_receipt_table";
+import RoomRow from "./room_table";
+import RoomBanRow from "./room_ban_table";
+import RoomInvitationRow from "./room_invitation_table";
+import RoomMemberRow from "./room_member_table";
+import ScheduledMessageRow from "./scheduled_message_table";
+import ThreadReplyRow from "./thread_reply_table";
+import TypingIndicatorRow from "./typing_indicator_table";
+import UserRow from "./user_table";
+
+/** Type-only namespace exports for generated type groups. */
+
+/** The schema information for all tables in this module. This is defined the same was as the tables would have been defined in the server. */
+const tablesSchema = __schema({
+  message: __table({
+    name: 'message',
+    indexes: [
+      { accessor: 'id', name: 'message_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'message_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+    ],
+    constraints: [
+      { name: 'message_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, MessageRow),
+  messageDraft: __table({
+    name: 'message_draft',
+    indexes: [
+      { accessor: 'id', name: 'message_draft_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'message_draft_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'message_draft_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'message_draft_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, MessageDraftRow),
+  messageEdit: __table({
+    name: 'message_edit',
+    indexes: [
+      { accessor: 'id', name: 'message_edit_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'messageId', name: 'message_edit_message_id_idx_btree', algorithm: 'btree', columns: [
+        'messageId',
+      ] },
+    ],
+    constraints: [
+      { name: 'message_edit_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, MessageEditRow),
+  messageReaction: __table({
+    name: 'message_reaction',
+    indexes: [
+      { accessor: 'id', name: 'message_reaction_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'messageId', name: 'message_reaction_message_id_idx_btree', algorithm: 'btree', columns: [
+        'messageId',
+      ] },
+      { accessor: 'roomId', name: 'message_reaction_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'message_reaction_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'message_reaction_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, MessageReactionRow),
+  readReceipt: __table({
+    name: 'read_receipt',
+    indexes: [
+      { accessor: 'id', name: 'read_receipt_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'read_receipt_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'read_receipt_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'read_receipt_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, ReadReceiptRow),
+  room: __table({
+    name: 'room',
+    indexes: [
+      { accessor: 'id', name: 'room_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'name', name: 'room_name_idx_btree', algorithm: 'btree', columns: [
+        'name',
+      ] },
+    ],
+    constraints: [
+      { name: 'room_id_key', constraint: 'unique', columns: ['id'] },
+      { name: 'room_name_key', constraint: 'unique', columns: ['name'] },
+    ],
+  }, RoomRow),
+  roomBan: __table({
+    name: 'room_ban',
+    indexes: [
+      { accessor: 'id', name: 'room_ban_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'room_ban_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'room_ban_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'room_ban_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, RoomBanRow),
+  roomInvitation: __table({
+    name: 'room_invitation',
+    indexes: [
+      { accessor: 'id', name: 'room_invitation_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'invitedUser', name: 'room_invitation_invited_user_idx_btree', algorithm: 'btree', columns: [
+        'invitedUser',
+      ] },
+      { accessor: 'roomId', name: 'room_invitation_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+    ],
+    constraints: [
+      { name: 'room_invitation_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, RoomInvitationRow),
+  roomMember: __table({
+    name: 'room_member',
+    indexes: [
+      { accessor: 'id', name: 'room_member_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'room_member_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'room_member_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'room_member_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, RoomMemberRow),
+  scheduledMessage: __table({
+    name: 'scheduled_message',
+    indexes: [
+      { accessor: 'roomId', name: 'scheduled_message_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'scheduledId', name: 'scheduled_message_scheduled_id_idx_btree', algorithm: 'btree', columns: [
+        'scheduledId',
+      ] },
+      { accessor: 'sender', name: 'scheduled_message_sender_idx_btree', algorithm: 'btree', columns: [
+        'sender',
+      ] },
+    ],
+    constraints: [
+      { name: 'scheduled_message_scheduled_id_key', constraint: 'unique', columns: ['scheduledId'] },
+    ],
+  }, ScheduledMessageRow),
+  threadReply: __table({
+    name: 'thread_reply',
+    indexes: [
+      { accessor: 'id', name: 'thread_reply_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'parentMessageId', name: 'thread_reply_parent_message_id_idx_btree', algorithm: 'btree', columns: [
+        'parentMessageId',
+      ] },
+      { accessor: 'roomId', name: 'thread_reply_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+    ],
+    constraints: [
+      { name: 'thread_reply_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, ThreadReplyRow),
+  typingIndicator: __table({
+    name: 'typing_indicator',
+    indexes: [
+      { accessor: 'id', name: 'typing_indicator_id_idx_btree', algorithm: 'btree', columns: [
+        'id',
+      ] },
+      { accessor: 'roomId', name: 'typing_indicator_room_id_idx_btree', algorithm: 'btree', columns: [
+        'roomId',
+      ] },
+      { accessor: 'userIdentity', name: 'typing_indicator_user_identity_idx_btree', algorithm: 'btree', columns: [
+        'userIdentity',
+      ] },
+    ],
+    constraints: [
+      { name: 'typing_indicator_id_key', constraint: 'unique', columns: ['id'] },
+    ],
+  }, TypingIndicatorRow),
+  user: __table({
+    name: 'user',
+    indexes: [
+      { accessor: 'identity', name: 'user_identity_idx_btree', algorithm: 'btree', columns: [
+        'identity',
+      ] },
+    ],
+    constraints: [
+      { name: 'user_identity_key', constraint: 'unique', columns: ['identity'] },
+    ],
+  }, UserRow),
+});
+
+/** The schema information for all reducers in this module. This is defined the same way as the reducers would have been defined in the server, except the body of the reducer is omitted in code generation. */
+const reducersSchema = __reducers(
+  __reducerSchema("accept_invitation", AcceptInvitationReducer),
+  __reducerSchema("cancel_scheduled_message", CancelScheduledMessageReducer),
+  __reducerSchema("create_dm", CreateDmReducer),
+  __reducerSchema("create_room", CreateRoomReducer),
+  __reducerSchema("decline_invitation", DeclineInvitationReducer),
+  __reducerSchema("edit_message", EditMessageReducer),
+  __reducerSchema("invite_user", InviteUserReducer),
+  __reducerSchema("join_room", JoinRoomReducer),
+  __reducerSchema("kick_user", KickUserReducer),
+  __reducerSchema("leave_room", LeaveRoomReducer),
+  __reducerSchema("mark_read", MarkReadReducer),
+  __reducerSchema("promote_user", PromoteUserReducer),
+  __reducerSchema("register", RegisterReducer),
+  __reducerSchema("reply_to_message", ReplyToMessageReducer),
+  __reducerSchema("save_draft", SaveDraftReducer),
+  __reducerSchema("schedule_message", ScheduleMessageReducer),
+  __reducerSchema("send_ephemeral_message", SendEphemeralMessageReducer),
+  __reducerSchema("send_message", SendMessageReducer),
+  __reducerSchema("set_status", SetStatusReducer),
+  __reducerSchema("set_typing", SetTypingReducer),
+  __reducerSchema("toggle_reaction", ToggleReactionReducer),
+);
+
+/** The schema information for all procedures in this module. This is defined the same way as the procedures would have been defined in the server. */
+const proceduresSchema = __procedures(
+);
+
+/** The remote SpacetimeDB module schema, both runtime and type information. */
+const REMOTE_MODULE = {
+  versionInfo: {
+    cliVersion: "2.1.0" as const,
+  },
+  tables: tablesSchema.schemaType.tables,
+  reducers: reducersSchema.reducersType.reducers,
+  ...proceduresSchema,
+} satisfies __RemoteModule<
+  typeof tablesSchema.schemaType,
+  typeof reducersSchema.reducersType,
+  typeof proceduresSchema
+>;
+
+/** The tables available in this remote SpacetimeDB module. Each table reference doubles as a query builder. */
+export const tables: __QueryBuilder<typeof tablesSchema.schemaType> = __makeQueryBuilder(tablesSchema.schemaType);
+
+/** The reducers available in this remote SpacetimeDB module. */
+export const reducers = __convertToAccessorMap(reducersSchema.reducersType.reducers);
+
+/** The context type returned in callbacks for all possible events. */
+export type EventContext = __EventContextInterface<typeof REMOTE_MODULE>;
+/** The context type returned in callbacks for reducer events. */
+export type ReducerEventContext = __ReducerEventContextInterface<typeof REMOTE_MODULE>;
+/** The context type returned in callbacks for subscription events. */
+export type SubscriptionEventContext = __SubscriptionEventContextInterface<typeof REMOTE_MODULE>;
+/** The context type returned in callbacks for error events. */
+export type ErrorContext = __ErrorContextInterface<typeof REMOTE_MODULE>;
+/** The subscription handle type to manage active subscriptions created from a {@link SubscriptionBuilder}. */
+export type SubscriptionHandle = __SubscriptionHandleImpl<typeof REMOTE_MODULE>;
+
+/** Builder class to configure a new subscription to the remote SpacetimeDB instance. */
+export class SubscriptionBuilder extends __SubscriptionBuilderImpl<typeof REMOTE_MODULE> {}
+
+/** Builder class to configure a new database connection to the remote SpacetimeDB instance. */
+export class DbConnectionBuilder extends __DbConnectionBuilder<DbConnection> {}
+
+/** The typed database connection to manage connections to the remote SpacetimeDB instance. This class has type information specific to the generated module. */
+export class DbConnection extends __DbConnectionImpl<typeof REMOTE_MODULE> {
+  /** Creates a new {@link DbConnectionBuilder} to configure and connect to the remote SpacetimeDB instance. */
+  static builder = (): DbConnectionBuilder => {
+    return new DbConnectionBuilder(REMOTE_MODULE, (config: __DbConnectionConfig<typeof REMOTE_MODULE>) => new DbConnection(config));
+  };
+
+  /** Creates a new {@link SubscriptionBuilder} to configure a subscription to the remote SpacetimeDB instance. */
+  override subscriptionBuilder = (): SubscriptionBuilder => {
+    return new SubscriptionBuilder(this);
+  };
+}
+
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/invite_user_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/invite_user_reducer.ts
new file mode 100644
index 00000000000..2df312ce10c
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/invite_user_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  targetName: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/join_room_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/join_room_reducer.ts
new file mode 100644
index 00000000000..80a9f7e20dd
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/join_room_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/kick_user_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/kick_user_reducer.ts
new file mode 100644
index 00000000000..acb136bee5e
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/kick_user_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  targetIdentityHex: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/leave_room_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/leave_room_reducer.ts
new file mode 100644
index 00000000000..80a9f7e20dd
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/leave_room_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/mark_read_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/mark_read_reducer.ts
new file mode 100644
index 00000000000..5fa3662f70a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/mark_read_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  lastReadMessageId: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_draft_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_draft_table.ts
new file mode 100644
index 00000000000..1dfd9117381
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_draft_table.ts
@@ -0,0 +1,18 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  userIdentity: __t.identity().name("user_identity"),
+  roomId: __t.u64().name("room_id"),
+  text: __t.string(),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_edit_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_edit_table.ts
new file mode 100644
index 00000000000..1c5b28c36e1
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_edit_table.ts
@@ -0,0 +1,20 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  messageId: __t.u64().name("message_id"),
+  editedBy: __t.identity().name("edited_by"),
+  oldText: __t.string().name("old_text"),
+  newText: __t.string().name("new_text"),
+  editedAt: __t.timestamp().name("edited_at"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_reaction_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_reaction_table.ts
new file mode 100644
index 00000000000..6cc349ade7b
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_reaction_table.ts
@@ -0,0 +1,19 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  messageId: __t.u64().name("message_id"),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+  emoji: __t.string(),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_table.ts
new file mode 100644
index 00000000000..f231fa81055
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/message_table.ts
@@ -0,0 +1,20 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  sender: __t.identity(),
+  text: __t.string(),
+  sentAt: __t.timestamp().name("sent_at"),
+  expiresAtMicros: __t.u64().name("expires_at_micros"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/promote_user_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/promote_user_reducer.ts
new file mode 100644
index 00000000000..acb136bee5e
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/promote_user_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  targetIdentityHex: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/read_receipt_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/read_receipt_table.ts
new file mode 100644
index 00000000000..82a3955a8c8
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/read_receipt_table.ts
@@ -0,0 +1,18 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+  lastReadMessageId: __t.u64().name("last_read_message_id"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/register_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/register_reducer.ts
new file mode 100644
index 00000000000..ce493ee8574
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/register_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  name: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/reply_to_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/reply_to_message_reducer.ts
new file mode 100644
index 00000000000..a6071b80942
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/reply_to_message_reducer.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  parentMessageId: __t.u64(),
+  roomId: __t.u64(),
+  text: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_admin_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_admin_table.ts
new file mode 100644
index 00000000000..023c08fc271
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_admin_table.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_ban_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_ban_table.ts
new file mode 100644
index 00000000000..023c08fc271
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_ban_table.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_invitation_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_invitation_table.ts
new file mode 100644
index 00000000000..dd93cac0238
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_invitation_table.ts
@@ -0,0 +1,19 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  invitedBy: __t.identity().name("invited_by"),
+  invitedUser: __t.identity().name("invited_user"),
+  createdAt: __t.timestamp().name("created_at"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_member_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_member_table.ts
new file mode 100644
index 00000000000..3a2f81f42cb
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_member_table.ts
@@ -0,0 +1,18 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+  isAdmin: __t.bool().name("is_admin"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_table.ts
new file mode 100644
index 00000000000..526b9367165
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/room_table.ts
@@ -0,0 +1,20 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  name: __t.string(),
+  createdBy: __t.identity().name("created_by"),
+  createdAt: __t.timestamp().name("created_at"),
+  isPrivate: __t.bool().name("is_private"),
+  isDm: __t.bool().name("is_dm"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/save_draft_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/save_draft_reducer.ts
new file mode 100644
index 00000000000..ee5c40c1d3d
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/save_draft_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  text: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/schedule_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/schedule_message_reducer.ts
new file mode 100644
index 00000000000..7eeb03b9878
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/schedule_message_reducer.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  text: __t.string(),
+  sendAtMicros: __t.u64(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/scheduled_message_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/scheduled_message_table.ts
new file mode 100644
index 00000000000..0c639bbac79
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/scheduled_message_table.ts
@@ -0,0 +1,19 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  scheduledId: __t.u64().primaryKey().name("scheduled_id"),
+  scheduledAt: __t.scheduleAt().name("scheduled_at"),
+  roomId: __t.u64().name("room_id"),
+  sender: __t.identity(),
+  text: __t.string(),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_ephemeral_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_ephemeral_message_reducer.ts
new file mode 100644
index 00000000000..4c4ff8afd82
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_ephemeral_message_reducer.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  text: __t.string(),
+  durationSeconds: __t.u32(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_message_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_message_reducer.ts
new file mode 100644
index 00000000000..ee5c40c1d3d
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/send_message_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  text: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_status_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_status_reducer.ts
new file mode 100644
index 00000000000..0b6c8c3c38a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_status_reducer.ts
@@ -0,0 +1,15 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  status: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_typing_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_typing_reducer.ts
new file mode 100644
index 00000000000..98e0582d43e
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/set_typing_reducer.ts
@@ -0,0 +1,16 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  roomId: __t.u64(),
+  isTyping: __t.bool(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/thread_reply_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/thread_reply_table.ts
new file mode 100644
index 00000000000..3003f7c02cf
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/thread_reply_table.ts
@@ -0,0 +1,20 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  parentMessageId: __t.u64().name("parent_message_id"),
+  roomId: __t.u64().name("room_id"),
+  sender: __t.identity(),
+  text: __t.string(),
+  sentAt: __t.timestamp().name("sent_at"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/toggle_reaction_reducer.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/toggle_reaction_reducer.ts
new file mode 100644
index 00000000000..5e83582f63f
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/toggle_reaction_reducer.ts
@@ -0,0 +1,17 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  messageId: __t.u64(),
+  roomId: __t.u64(),
+  emoji: __t.string(),
+};
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types.ts
new file mode 100644
index 00000000000..6725285d5fa
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types.ts
@@ -0,0 +1,135 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export const Message = __t.object("Message", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  sender: __t.identity(),
+  text: __t.string(),
+  sentAt: __t.timestamp(),
+  expiresAtMicros: __t.u64(),
+});
+export type Message = __Infer<typeof Message>;
+
+export const MessageDraft = __t.object("MessageDraft", {
+  id: __t.u64(),
+  userIdentity: __t.identity(),
+  roomId: __t.u64(),
+  text: __t.string(),
+});
+export type MessageDraft = __Infer<typeof MessageDraft>;
+
+export const MessageEdit = __t.object("MessageEdit", {
+  id: __t.u64(),
+  messageId: __t.u64(),
+  editedBy: __t.identity(),
+  oldText: __t.string(),
+  newText: __t.string(),
+  editedAt: __t.timestamp(),
+});
+export type MessageEdit = __Infer<typeof MessageEdit>;
+
+export const MessageExpiryTimer = __t.object("MessageExpiryTimer", {
+  scheduledId: __t.u64(),
+  scheduledAt: __t.scheduleAt(),
+  messageId: __t.u64(),
+});
+export type MessageExpiryTimer = __Infer<typeof MessageExpiryTimer>;
+
+export const MessageReaction = __t.object("MessageReaction", {
+  id: __t.u64(),
+  messageId: __t.u64(),
+  roomId: __t.u64(),
+  userIdentity: __t.identity(),
+  emoji: __t.string(),
+});
+export type MessageReaction = __Infer<typeof MessageReaction>;
+
+export const ReadReceipt = __t.object("ReadReceipt", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  userIdentity: __t.identity(),
+  lastReadMessageId: __t.u64(),
+});
+export type ReadReceipt = __Infer<typeof ReadReceipt>;
+
+export const Room = __t.object("Room", {
+  id: __t.u64(),
+  name: __t.string(),
+  createdBy: __t.identity(),
+  createdAt: __t.timestamp(),
+  isPrivate: __t.bool(),
+  isDm: __t.bool(),
+});
+export type Room = __Infer<typeof Room>;
+
+export const RoomBan = __t.object("RoomBan", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  userIdentity: __t.identity(),
+});
+export type RoomBan = __Infer<typeof RoomBan>;
+
+export const RoomInvitation = __t.object("RoomInvitation", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  invitedBy: __t.identity(),
+  invitedUser: __t.identity(),
+  createdAt: __t.timestamp(),
+});
+export type RoomInvitation = __Infer<typeof RoomInvitation>;
+
+export const RoomMember = __t.object("RoomMember", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  userIdentity: __t.identity(),
+  isAdmin: __t.bool(),
+});
+export type RoomMember = __Infer<typeof RoomMember>;
+
+export const ScheduledMessage = __t.object("ScheduledMessage", {
+  scheduledId: __t.u64(),
+  scheduledAt: __t.scheduleAt(),
+  roomId: __t.u64(),
+  sender: __t.identity(),
+  text: __t.string(),
+});
+export type ScheduledMessage = __Infer<typeof ScheduledMessage>;
+
+export const ThreadReply = __t.object("ThreadReply", {
+  id: __t.u64(),
+  parentMessageId: __t.u64(),
+  roomId: __t.u64(),
+  sender: __t.identity(),
+  text: __t.string(),
+  sentAt: __t.timestamp(),
+});
+export type ThreadReply = __Infer<typeof ThreadReply>;
+
+export const TypingIndicator = __t.object("TypingIndicator", {
+  id: __t.u64(),
+  roomId: __t.u64(),
+  userIdentity: __t.identity(),
+  updatedAt: __t.timestamp(),
+});
+export type TypingIndicator = __Infer<typeof TypingIndicator>;
+
+export const User = __t.object("User", {
+  identity: __t.identity(),
+  name: __t.string(),
+  online: __t.bool(),
+  status: __t.string(),
+  lastActiveAt: __t.timestamp(),
+  isGuest: __t.bool(),
+});
+export type User = __Infer<typeof User>;
+
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/procedures.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/procedures.ts
new file mode 100644
index 00000000000..d5ac825c9ab
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/procedures.ts
@@ -0,0 +1,10 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import { type Infer as __Infer } from "spacetimedb";
+
+// Import all procedure arg schemas
+
+
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/reducers.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/reducers.ts
new file mode 100644
index 00000000000..9ae45d84050
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/types/reducers.ts
@@ -0,0 +1,52 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import { type Infer as __Infer } from "spacetimedb";
+
+// Import all reducer arg schemas
+import AcceptInvitationReducer from "../accept_invitation_reducer";
+import CancelScheduledMessageReducer from "../cancel_scheduled_message_reducer";
+import CreateDmReducer from "../create_dm_reducer";
+import CreateRoomReducer from "../create_room_reducer";
+import DeclineInvitationReducer from "../decline_invitation_reducer";
+import EditMessageReducer from "../edit_message_reducer";
+import InviteUserReducer from "../invite_user_reducer";
+import JoinRoomReducer from "../join_room_reducer";
+import KickUserReducer from "../kick_user_reducer";
+import LeaveRoomReducer from "../leave_room_reducer";
+import MarkReadReducer from "../mark_read_reducer";
+import PromoteUserReducer from "../promote_user_reducer";
+import RegisterReducer from "../register_reducer";
+import ReplyToMessageReducer from "../reply_to_message_reducer";
+import SaveDraftReducer from "../save_draft_reducer";
+import ScheduleMessageReducer from "../schedule_message_reducer";
+import SendEphemeralMessageReducer from "../send_ephemeral_message_reducer";
+import SendMessageReducer from "../send_message_reducer";
+import SetStatusReducer from "../set_status_reducer";
+import SetTypingReducer from "../set_typing_reducer";
+import ToggleReactionReducer from "../toggle_reaction_reducer";
+
+export type AcceptInvitationParams = __Infer<typeof AcceptInvitationReducer>;
+export type CancelScheduledMessageParams = __Infer<typeof CancelScheduledMessageReducer>;
+export type CreateDmParams = __Infer<typeof CreateDmReducer>;
+export type CreateRoomParams = __Infer<typeof CreateRoomReducer>;
+export type DeclineInvitationParams = __Infer<typeof DeclineInvitationReducer>;
+export type EditMessageParams = __Infer<typeof EditMessageReducer>;
+export type InviteUserParams = __Infer<typeof InviteUserReducer>;
+export type JoinRoomParams = __Infer<typeof JoinRoomReducer>;
+export type KickUserParams = __Infer<typeof KickUserReducer>;
+export type LeaveRoomParams = __Infer<typeof LeaveRoomReducer>;
+export type MarkReadParams = __Infer<typeof MarkReadReducer>;
+export type PromoteUserParams = __Infer<typeof PromoteUserReducer>;
+export type RegisterParams = __Infer<typeof RegisterReducer>;
+export type ReplyToMessageParams = __Infer<typeof ReplyToMessageReducer>;
+export type SaveDraftParams = __Infer<typeof SaveDraftReducer>;
+export type ScheduleMessageParams = __Infer<typeof ScheduleMessageReducer>;
+export type SendEphemeralMessageParams = __Infer<typeof SendEphemeralMessageReducer>;
+export type SendMessageParams = __Infer<typeof SendMessageReducer>;
+export type SetStatusParams = __Infer<typeof SetStatusReducer>;
+export type SetTypingParams = __Infer<typeof SetTypingReducer>;
+export type ToggleReactionParams = __Infer<typeof ToggleReactionReducer>;
+
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/typing_indicator_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/typing_indicator_table.ts
new file mode 100644
index 00000000000..a8b3fdc3090
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/typing_indicator_table.ts
@@ -0,0 +1,18 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  id: __t.u64().primaryKey(),
+  roomId: __t.u64().name("room_id"),
+  userIdentity: __t.identity().name("user_identity"),
+  updatedAt: __t.timestamp().name("updated_at"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/user_table.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/user_table.ts
new file mode 100644
index 00000000000..11c2d5c9626
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/module_bindings/user_table.ts
@@ -0,0 +1,20 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default __t.row({
+  identity: __t.identity().primaryKey(),
+  name: __t.string(),
+  online: __t.bool(),
+  status: __t.string(),
+  lastActiveAt: __t.timestamp().name("last_active_at"),
+  isGuest: __t.bool().name("is_guest"),
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts
new file mode 100644
index 00000000000..cf98b37554c
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts
@@ -0,0 +1,172 @@
+// Realistic chat scenario.
+//
+// Spawns M concurrent users, each sending 1 message every 5-15 seconds (jittered)
+// for `durationSec` seconds. Measures the same metrics as stress-throughput,
+// but under load that resembles real usage rather than worst-case flooding.
+//
+// This is the headroom test: can the app sustain a comfortable chat load
+// without latency tail blowing up?
+
+import { LatencyHistogram, parseStamp, stampMessage, nsToMs, type ScenarioResult } from '../metrics.ts';
+import {
+  type PgConfig,
+  createPgRoom,
+  createPgUser,
+  joinPgRoom,
+  connectPgClient,
+  pgSend,
+} from '../clients/postgres-client.ts';
+import {
+  type StdbConfig,
+  connectStdb,
+  stdbCreateRoom,
+  stdbFindRoomIdByName,
+  stdbJoinRoom,
+  stdbSendMessage,
+  stdbSetName,
+} from '../clients/spacetime-client.ts';
+
+export interface RealisticOpts {
+  users: number;
+  durationSec: number;
+  minIntervalMs: number; // default 5000
+  maxIntervalMs: number; // default 15000
+}
+
+function jitter(min: number, max: number): number {
+  return min + Math.random() * (max - min);
+}
+
+export async function runRealisticPostgres(cfg: PgConfig, opts: RealisticOpts): Promise<ScenarioResult> {
+  const tag = `pr${Date.now().toString(36)}`;
+  const users = await Promise.all(
+    Array.from({ length: opts.users }, (_, i) => createPgUser(cfg, `${tag}_u${i}`)),
+  );
+  const listenerUser = await createPgUser(cfg, `${tag}_listener`);
+  const room = await createPgRoom(cfg, tag, listenerUser.id);
+  await Promise.all(users.map((u) => joinPgRoom(cfg, room.id, u.id)));
+
+  const fanout = new LatencyHistogram();
+  let received = 0;
+  let measuring = false;
+
+  const listener = await connectPgClient(cfg, listenerUser, room.id, (msg) => {
+    if (!measuring) return;
+    const stamp = parseStamp(msg.content);
+    if (!stamp) return;
+    received += 1;
+    fanout.record(nsToMs(process.hrtime.bigint() - stamp.sentNs));
+  });
+
+  const clients = await Promise.all(
+    users.map((u) => connectPgClient(cfg, u, room.id, () => { /* discard own echoes */ })),
+  );
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+  let sent = 0;
+
+  const userLoop = async (c: typeof clients[number]): Promise<void> => {
+    while (Date.now() < endTime) {
+      pgSend(c, room.id, stampMessage(seq++));
+      sent += 1;
+      await new Promise((r) => setTimeout(r, jitter(opts.minIntervalMs, opts.maxIntervalMs)));
+    }
+  };
+  await Promise.all(clients.map(userLoop));
+
+  await new Promise((r) => setTimeout(r, 2000));
+  measuring = false;
+
+  for (const c of clients) c.close();
+  listener.close();
+
+  return {
+    scenario: 'realistic-chat',
+    backend: 'postgres',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.users,
+    sent,
+    received,
+    errors: 0,
+    msgsPerSec: received / opts.durationSec,
+    ackLatencyMs: new LatencyHistogram().summary(),
+    fanoutLatencyMs: fanout.summary(),
+    notes: `${opts.users} users, jitter ${opts.minIntervalMs}-${opts.maxIntervalMs}ms`,
+  };
+}
+
+export async function runRealisticSpacetime(cfg: StdbConfig, opts: RealisticOpts): Promise<ScenarioResult> {
+  const tag = `sr${Date.now().toString(36)}`;
+
+  const fanout = new LatencyHistogram();
+  let received = 0;
+  let measuring = false;
+
+  const listener = await connectStdb(cfg, {
+    onMessage: (row) => {
+      if (!measuring) return;
+      const stamp = parseStamp(row.text);
+      if (!stamp) return;
+      received += 1;
+      fanout.record(nsToMs(process.hrtime.bigint() - stamp.sentNs));
+    },
+  });
+  await stdbSetName(listener, `${tag}_l`);
+  await stdbCreateRoom(listener, tag);
+  let roomId: bigint | null = null;
+  for (let i = 0; i < 20 && roomId === null; i++) {
+    roomId = stdbFindRoomIdByName(listener, tag);
+    if (roomId === null) await new Promise((r) => setTimeout(r, 100));
+  }
+  if (roomId === null) throw new Error('failed to locate created room id');
+
+  const clients: Awaited<ReturnType<typeof connectStdb>>[] = [];
+  for (let i = 0; i < opts.users; i++) {
+    const w = await connectStdb(cfg);
+    await stdbSetName(w, `${tag}_u${i}`);
+    await stdbJoinRoom(w, roomId);
+    clients.push(w);
+  }
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+  let sent = 0;
+
+  const userLoop = async (c: typeof clients[number]): Promise<void> => {
+    while (Date.now() < endTime) {
+      try {
+        await stdbSendMessage(c, roomId!, stampMessage(seq++));
+        sent += 1;
+      } catch { /* ignore */ }
+      await new Promise((r) => setTimeout(r, jitter(opts.minIntervalMs, opts.maxIntervalMs)));
+    }
+  };
+  await Promise.all(clients.map(userLoop));
+
+  await new Promise((r) => setTimeout(r, 2000));
+  measuring = false;
+
+  for (const c of clients) c.close();
+  listener.close();
+
+  return {
+    scenario: 'realistic-chat',
+    backend: 'spacetime',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.users,
+    sent,
+    received,
+    errors: 0,
+    msgsPerSec: received / opts.durationSec,
+    ackLatencyMs: new LatencyHistogram().summary(),
+    fanoutLatencyMs: fanout.summary(),
+    notes: `${opts.users} users, jitter ${opts.minIntervalMs}-${opts.maxIntervalMs}ms`,
+  };
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts
new file mode 100644
index 00000000000..5c8ee982003
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts
@@ -0,0 +1,280 @@
+// Stress throughput scenario.
+//
+// Spawns N writer clients and has each fire send_message as fast as possible
+// for `durationSec` seconds. A separate listener client (subscribed to the same
+// room) measures fan-out latency by parsing a hrtime stamp embedded in the
+// message text.
+//
+// Reports:
+//   - sustained msgs/sec     (received-by-listener / duration)
+//   - ack latency p50/p99    (PG: writer's own echo round-trip; STDB: reducer Promise resolve)
+//   - fan-out latency p50/p99 (writer hrtime → listener observes row)
+
+import { LatencyHistogram, parseStamp, stampMessage, nsToMs, type ScenarioResult } from '../metrics.ts';
+import {
+  type PgConfig,
+  createPgRoom,
+  createPgUser,
+  joinPgRoom,
+  connectPgClient,
+  pgSend,
+  pgSendRest,
+} from '../clients/postgres-client.ts';
+import {
+  type StdbConfig,
+  connectStdb,
+  stdbCreateRoom,
+  stdbFindRoomIdByName,
+  stdbJoinRoom,
+  stdbSendMessage,
+  stdbSetName,
+} from '../clients/spacetime-client.ts';
+
+export interface StressOpts {
+  writers: number;
+  durationSec: number;
+}
+
+export async function runStressPostgres(cfg: PgConfig, opts: StressOpts): Promise<ScenarioResult> {
+  const tag = `ps${Date.now().toString(36)}`; // ~10 chars
+
+  // Create N writers + 1 listener; one room they all join.
+  const writerUsers = await Promise.all(
+    Array.from({ length: opts.writers }, (_, i) => createPgUser(cfg, `${tag}_w${i}`)),
+  );
+  const listenerUser = await createPgUser(cfg, `${tag}_listener`);
+  const room = await createPgRoom(cfg, tag, listenerUser.id);
+  await Promise.all(writerUsers.map((u) => joinPgRoom(cfg, room.id, u.id)));
+
+  const ack = new LatencyHistogram();
+  const fanout = new LatencyHistogram();
+  const inflight = new Map<number, bigint>();
+  let received = 0;
+  let sent = 0;
+  let measuring = false;
+
+  // Listener: counts received and computes fan-out latency
+  const listener = await connectPgClient(cfg, listenerUser, room.id, (msg) => {
+    if (!measuring) return;
+    const stamp = parseStamp(msg.content);
+    if (!stamp) return;
+    received += 1;
+    fanout.record(nsToMs(process.hrtime.bigint() - stamp.sentNs));
+  });
+
+  // Writers: each writer also subscribes (joined the room) and uses its own
+  // echoes as the "ack" — the moment the server has inserted the message and
+  // re-broadcast it back to me.
+  const writers = await Promise.all(
+    writerUsers.map((u) =>
+      connectPgClient(cfg, u, room.id, (msg) => {
+        if (!measuring) return;
+        if (msg.userId !== u.id) return;
+        const stamp = parseStamp(msg.content);
+        if (!stamp) return;
+        const start = inflight.get(stamp.seq);
+        if (start !== undefined) {
+          ack.record(nsToMs(process.hrtime.bigint() - start));
+          inflight.delete(stamp.seq);
+        }
+      }),
+    ),
+  );
+
+  // Brief warmup — also detects whether this PG app uses socket-based or
+  // REST-based message sending. Track warmup echoes separately since
+  // `received` only increments when `measuring` is true.
+  let warmupEchoes = 0;
+  const warmupHandler = () => { warmupEchoes += 1; };
+  listener.socket.on('message', warmupHandler);
+  listener.socket.on('new_message', warmupHandler);
+  for (let i = 0; i < writers.length; i++) {
+    pgSend(writers[i]!, room.id, `${'__bench:'}${process.hrtime.bigint()}:0:warmup`);
+  }
+  await new Promise((r) => setTimeout(r, 1500));
+  listener.socket.off('message', warmupHandler);
+  listener.socket.off('new_message', warmupHandler);
+  const useRest = warmupEchoes === 0; // socket warmup produced no echoes → REST mode
+  if (useRest) {
+    console.log('[pg] Socket send produced no echoes — switching to REST mode (POST /api/rooms/:id/messages)');
+  }
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+
+  const MAX_INFLIGHT = 200;
+  const writerLoop = async (w: typeof writers[number]): Promise<void> => {
+    if (useRest) {
+      // REST mode (20260403): POST per message, ack = HTTP response
+      while (Date.now() < endTime) {
+        while (inflight.size >= MAX_INFLIGHT && Date.now() < endTime) {
+          await new Promise((r) => setTimeout(r, 1));
+        }
+        if (Date.now() >= endTime) break;
+        const s = seq++;
+        const t0 = process.hrtime.bigint();
+        sent += 1;
+        try {
+          const resp = await pgSendRest(cfg, room.id, w.user.id, stampMessage(s));
+          if (resp) {
+            received += 1;
+            ack.record(nsToMs(process.hrtime.bigint() - t0));
+            fanout.record(nsToMs(process.hrtime.bigint() - t0));
+          }
+        } catch { /* ignore */ }
+      }
+    } else {
+      // Socket mode (20260406): fire-and-forget emit, ack = echo
+      while (Date.now() < endTime) {
+        while (inflight.size >= MAX_INFLIGHT && Date.now() < endTime) {
+          await new Promise((r) => setTimeout(r, 1));
+        }
+        if (Date.now() >= endTime) break;
+        const s = seq++;
+        inflight.set(s, process.hrtime.bigint());
+        pgSend(w, room.id, stampMessage(s));
+        sent += 1;
+        await new Promise((r) => setImmediate(r));
+      }
+    }
+  };
+  await Promise.all(writers.map(writerLoop));
+
+  // Drain in-flight echoes
+  await new Promise((r) => setTimeout(r, 3000));
+  measuring = false;
+
+  for (const w of writers) w.close();
+  listener.close();
+
+  return {
+    scenario: 'stress-throughput',
+    backend: 'postgres',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.writers,
+    sent,
+    received,
+    errors: 0,
+    msgsPerSec: received / opts.durationSec,
+    ackLatencyMs: ack.summary(),
+    fanoutLatencyMs: fanout.summary(),
+    notes: `${opts.writers} writers firing as fast as possible`,
+  };
+}
+
+export async function runStressSpacetime(cfg: StdbConfig, opts: StressOpts): Promise<ScenarioResult> {
+  const tag = `ss${Date.now().toString(36)}`;
+
+  const ack = new LatencyHistogram();
+  const fanout = new LatencyHistogram();
+  let received = 0;
+  let measuring = false;
+
+  // Seed connection: only subscribes to the room table, enough to create the
+  // bench room and look up its id. Avoids syncing the (potentially large)
+  // message table on every new connection.
+  const seed = await connectStdb(cfg, { subscriptions: ['SELECT * FROM room'] });
+  await stdbSetName(seed, `${tag}_s`);
+  await stdbCreateRoom(seed, tag);
+  let roomId: bigint | null = null;
+  for (let i = 0; i < 20 && roomId === null; i++) {
+    roomId = stdbFindRoomIdByName(seed, tag);
+    if (roomId === null) await new Promise((r) => setTimeout(r, 100));
+  }
+  if (roomId === null) throw new Error('failed to locate created room id');
+
+  // Listener DISABLED for pure write-throughput measurement. With the listener
+  // subscribed, fan-out processing competes with writer ack handling on the
+  // same Node event loop, becoming the client-side bottleneck. We trust the
+  // reducer ack to measure successful commits. fanout histogram will be empty.
+  const listener: { close: () => void } | null = null;
+
+  // Spawn writers. Writers don't need table subscriptions — ack latency comes
+  // from the reducer promise, not from observing echoes. Skipping the default
+  // subscription set avoids syncing ~70k historical message rows per writer.
+  const writers: Awaited<ReturnType<typeof connectStdb>>[] = [];
+  for (let i = 0; i < opts.writers; i++) {
+    const w = await connectStdb(cfg, { subscriptions: [] });
+    await stdbSetName(w, `${tag}_w${i}`);
+    await stdbJoinRoom(w, roomId);
+    writers.push(w);
+  }
+
+  // Warmup: each writer fires 5 messages
+  for (let i = 0; i < 5; i++) {
+    await Promise.all(writers.map((w) => stdbSendMessage(w, roomId!, `${'__bench:'}${process.hrtime.bigint()}:0:warmup`)));
+  }
+  // Tiny pause to let warmup drain
+  await new Promise((r) => setTimeout(r, 500));
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+  let sent = 0;
+
+  // Each writer worker runs a pipelined loop — keeps up to MAX_INFLIGHT_PER_WORKER
+  // reducer calls in flight concurrently. Matches keynote-2 benchmark methodology.
+  // STDB handles many more in-flight calls than PG because it batches over WS.
+  const MAX_INFLIGHT_PER_WORKER = 10;
+  const writerLoop = async (w: typeof writers[number]): Promise<void> => {
+    const inflight = new Set<Promise<void>>();
+    const launchOp = () => {
+      const s = seq++;
+      const text = stampMessage(s);
+      const t0 = process.hrtime.bigint();
+      sent += 1;
+      const p = stdbSendMessage(w, roomId!, text).then(
+        () => {
+          if (Date.now() < endTime) {
+            ack.record(nsToMs(process.hrtime.bigint() - t0));
+          }
+        },
+        () => { /* ignore errors */ }
+      );
+      inflight.add(p);
+      p.finally(() => { inflight.delete(p); });
+    };
+    while (Date.now() < endTime) {
+      if (inflight.size < MAX_INFLIGHT_PER_WORKER) {
+        launchOp();
+      } else {
+        await new Promise((r) => setImmediate(r));
+      }
+    }
+    // Drain outstanding for up to 5s after end
+    const drainDeadline = Date.now() + 5000;
+    while (inflight.size > 0 && Date.now() < drainDeadline) {
+      await new Promise((r) => setTimeout(r, 10));
+    }
+  };
+  await Promise.all(writers.map(writerLoop));
+
+  // Drain
+  await new Promise((r) => setTimeout(r, 3000));
+  measuring = false;
+
+  for (const w of writers) w.close();
+  if (listener) listener.close();
+
+  // With listener disabled, count "received" as acked reducer calls — we
+  // trust reducer acks as proof the row was committed.
+  if (received === 0) received = ack.count();
+
+  return {
+    scenario: 'stress-throughput',
+    backend: 'spacetime',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.writers,
+    sent,
+    received,
+    errors: 0,
+    msgsPerSec: received / opts.durationSec,
+    ackLatencyMs: ack.summary(),
+    fanoutLatencyMs: fanout.summary(),
+  };
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/tsconfig.json b/tools/llm-sequential-upgrade/perf-benchmark/tsconfig.json
new file mode 100644
index 00000000000..653e6ad8e1f
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "allowJs": true,
+    "noEmit": true,
+    "resolveJsonModule": true,
+    "allowImportingTsExtensions": true
+  },
+  "include": ["src/**/*"]
+}
diff --git a/tools/llm-sequential-upgrade/reset-app.sh b/tools/llm-sequential-upgrade/reset-app.sh
new file mode 100644
index 00000000000..f52df842379
--- /dev/null
+++ b/tools/llm-sequential-upgrade/reset-app.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Reset an app's backend state for a clean test run.
+# Publishes a fresh SpacetimeDB module or resets PostgreSQL tables.
+#
+# Usage:
+#   ./reset-app.sh <app-dir>
+#
+# This gives Playwright a clean slate — no leftover users, rooms, or messages.
+
+set -euo pipefail
+
+APP_DIR="${1:?Usage: ./reset-app.sh <app-dir>}"
+
+if [[ ! -d "$APP_DIR" ]]; then
+  echo "ERROR: App directory not found: $APP_DIR"
+  exit 1
+fi
+
+# Ensure spacetime is in PATH
+SPACETIME_DIR="${USERPROFILE:-$HOME}/AppData/Local/SpacetimeDB"
+if [[ -d "$SPACETIME_DIR" ]]; then
+  export PATH="$PATH:$SPACETIME_DIR"
+fi
+_USER="${USER:-${USERNAME:-$(whoami)}}"
+if [[ -d "/c/Users/$_USER/AppData/Local/SpacetimeDB" ]]; then
+  export PATH="$PATH:/c/Users/$_USER/AppData/Local/SpacetimeDB"
+fi
+
+# Auto-detect backend
+if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+  BACKEND="spacetime"
+elif [[ -d "$APP_DIR/server" ]]; then
+  BACKEND="postgres"
+else
+  echo "ERROR: Cannot detect backend in $APP_DIR"
+  exit 1
+fi
+
+RESET_ID="test-$(date +%s)"
+
+if [[ "$BACKEND" == "spacetime" ]]; then
+  echo "Resetting SpacetimeDB module..."
+
+  # Generate a fresh module name
+  NEW_MODULE="chat-app-$RESET_ID"
+
+  # Publish fresh module
+  BACKEND_DIR="$APP_DIR/backend/spacetimedb"
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    BACKEND_DIR_NATIVE=$(cygpath -w "$BACKEND_DIR")
+  else
+    BACKEND_DIR_NATIVE="$BACKEND_DIR"
+  fi
+
+  echo "  Publishing module: $NEW_MODULE"
+  spacetime publish -p "$BACKEND_DIR_NATIVE" -s local "$NEW_MODULE" 2>&1 | tail -3
+
+  # Update client config to point at new module
+  CONFIG_FILE="$APP_DIR/client/src/config.ts"
+  if [[ -f "$CONFIG_FILE" ]]; then
+    sed -i "s/MODULE_NAME = '.*'/MODULE_NAME = '$NEW_MODULE'/" "$CONFIG_FILE"
+    echo "  Updated config.ts: MODULE_NAME = '$NEW_MODULE'"
+  else
+    echo "  WARNING: config.ts not found at $CONFIG_FILE"
+  fi
+
+  echo "  Module reset complete. Vite will hot-reload."
+
+elif [[ "$BACKEND" == "postgres" ]]; then
+  echo "Resetting PostgreSQL database..."
+
+  # Find the database name from the server code or .env
+  POSTGRES_CONTAINER="${POSTGRES_CONTAINER:-llm-sequential-upgrade-postgres-1}"
+  DB_NAME="spacetime"
+
+  # Look for DATABASE_URL in the server to find the actual database
+  SERVER_DIR="$APP_DIR/server"
+  if [[ -f "$SERVER_DIR/.env" ]]; then
+    DB_URL=$(grep DATABASE_URL "$SERVER_DIR/.env" | head -1 | cut -d= -f2-)
+    DB_NAME=$(echo "$DB_URL" | sed 's|.*/||; s|?.*||')
+  fi
+
+  # Drop all tables and recreate via Drizzle push
+  echo "  Dropping all tables in $DB_NAME..."
+  docker exec "$POSTGRES_CONTAINER" psql -U spacetime -d "$DB_NAME" -c "
+    DO \$\$ DECLARE
+      r RECORD;
+    BEGIN
+      FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
+        EXECUTE 'DROP TABLE IF EXISTS ' || quote_ident(r.tablename) || ' CASCADE';
+      END LOOP;
+    END \$\$;
+  " 2>&1 | tail -1
+
+  # Re-push Drizzle schema
+  echo "  Pushing Drizzle schema..."
+  cd "$SERVER_DIR"
+  npx drizzle-kit push 2>&1 | tail -3
+  cd - > /dev/null
+
+  echo "  Database reset complete."
+fi
+
+echo "Reset complete for $BACKEND backend."
diff --git a/tools/llm-sequential-upgrade/run-loop.sh b/tools/llm-sequential-upgrade/run-loop.sh
new file mode 100644
index 00000000000..dc7176de711
--- /dev/null
+++ b/tools/llm-sequential-upgrade/run-loop.sh
@@ -0,0 +1,237 @@
+#!/bin/bash
+# Exhaust Loop — Full generate → grade → fix cycle for a single run.
+#
+# Drives one backend through the complete benchmark:
+#   1. Generate (or upgrade) the app
+#   2. Grade with Chrome MCP
+#   3. If bugs: fix and re-grade (repeat until pass or max iterations)
+#   4. For sequential: upgrade to next level, repeat from step 2
+#
+# Usage:
+#   ./run-loop.sh --backend spacetime --level 7 --rules standard --run-index 0
+#   ./run-loop.sh --backend postgres --variant one-shot --level 7 --run-index 1
+#   ./run-loop.sh --backend spacetime --variant sequential-upgrade --level 12 --run-index 0
+#
+# Grading uses Chrome MCP (interactive Claude Code session).
+# A lock file serializes grading across parallel runs.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# ─── Parse arguments ─────────────────────────────────────────────────────────
+
+BACKEND="spacetime"
+VARIANT="one-shot"
+LEVEL=7
+RULES="guided"
+TEST_MODE=""
+RUN_INDEX=0
+MAX_FIX_ITERATIONS=5
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --backend) BACKEND="$2"; shift 2 ;;
+    --variant) VARIANT="$2"; shift 2 ;;
+    --level) LEVEL="$2"; shift 2 ;;
+    --rules) RULES="$2"; shift 2 ;;
+    --test) TEST_MODE="$2"; shift 2 ;;
+    --run-index) RUN_INDEX="$2"; shift 2 ;;
+    --max-fixes) MAX_FIX_ITERATIONS="$2"; shift 2 ;;
+    *) echo "Unknown option: $1"; exit 1 ;;
+  esac
+done
+
+TEST_FLAG=""
+if [[ -n "$TEST_MODE" ]]; then
+  TEST_FLAG="--test $TEST_MODE"
+fi
+
+LOCK_FILE="$SCRIPT_DIR/.grade-lock"
+LOG_PREFIX="[run-$RUN_INDEX/$BACKEND]"
+
+echo "═══════════════════════════════════════════"
+echo "$LOG_PREFIX Exhaust Loop"
+echo "  Backend:   $BACKEND"
+echo "  Variant:   $VARIANT"
+echo "  Level:     $LEVEL"
+echo "  Rules:     $RULES"
+echo "  Run index: $RUN_INDEX"
+echo "  Max fixes: $MAX_FIX_ITERATIONS"
+echo "═══════════════════════════════════════════"
+
+# ─── Helper: acquire grading lock ────────────────────────────────────────────
+# Only one grading session at a time (Chrome MCP limitation).
+
+acquire_grade_lock() {
+  echo "$LOG_PREFIX Waiting for grading lock..."
+  while ! mkdir "$LOCK_FILE" 2>/dev/null; do
+    sleep 5
+  done
+  echo "$LOG_PREFIX Grading lock acquired"
+}
+
+release_grade_lock() {
+  rmdir "$LOCK_FILE" 2>/dev/null || true
+}
+
+# Clean up lock on exit
+trap 'release_grade_lock' EXIT
+
+# ─── Helper: grade the app ──────────────────────────────────────────────────
+
+grade_app() {
+  local app_dir="$1"
+  local grade_level="$2"
+
+  acquire_grade_lock
+
+  echo "$LOG_PREFIX Grading at level $grade_level..."
+  "$SCRIPT_DIR/grade.sh" "$app_dir" 2>&1 | tee "$app_dir/grade-output-level${grade_level}.log"
+
+  release_grade_lock
+
+  # Check if bugs were found
+  if [[ -f "$app_dir/BUG_REPORT.md" ]]; then
+    echo "$LOG_PREFIX Bugs found — fix iteration needed"
+    return 1
+  else
+    echo "$LOG_PREFIX All features passed at level $grade_level"
+    return 0
+  fi
+}
+
+# ─── Helper: fix bugs ───────────────────────────────────────────────────────
+
+fix_bugs() {
+  local app_dir="$1"
+  local iteration="$2"
+
+  echo "$LOG_PREFIX Fix iteration $iteration..."
+  "$SCRIPT_DIR/run.sh" \
+    --fix "$app_dir" \
+    --variant "$VARIANT" \
+    --rules "$RULES" \
+    $TEST_FLAG \
+    --run-index "$RUN_INDEX" \
+    --level "$LEVEL" \
+    --resume-session \
+    2>&1 | tee "$app_dir/fix-output-iter${iteration}.log"
+}
+
+# ─── ONE-SHOT FLOW ──────────────────────────────────────────────────────────
+
+if [[ "$VARIANT" == "one-shot" ]]; then
+  echo "$LOG_PREFIX === One-Shot: Generating all features ==="
+
+  # Step 1: Generate
+  "$SCRIPT_DIR/run.sh" \
+    --variant "$VARIANT" \
+    --rules "$RULES" \
+    $TEST_FLAG \
+    --backend "$BACKEND" \
+    --run-index "$RUN_INDEX" \
+    --level "$LEVEL"
+
+  # Find the app directory
+  APP_DIR=$(ls -dt "$SCRIPT_DIR/$VARIANT"/*"/$BACKEND/results"/chat-app-* 2>/dev/null | head -1)
+  if [[ -z "$APP_DIR" || ! -d "$APP_DIR" ]]; then
+    echo "$LOG_PREFIX ERROR: Could not find generated app directory"
+    exit 1
+  fi
+  echo "$LOG_PREFIX App dir: $APP_DIR"
+
+  # Step 2: Grade → Fix loop
+  ITERATION=0
+  while true; do
+    if grade_app "$APP_DIR" "$LEVEL"; then
+      echo "$LOG_PREFIX === One-Shot Complete: All features pass ==="
+      break
+    fi
+
+    ITERATION=$((ITERATION + 1))
+    if [[ $ITERATION -ge $MAX_FIX_ITERATIONS ]]; then
+      echo "$LOG_PREFIX === Max fix iterations ($MAX_FIX_ITERATIONS) reached ==="
+      break
+    fi
+
+    fix_bugs "$APP_DIR" "$ITERATION"
+  done
+
+# ─── SEQUENTIAL-UPGRADE FLOW ────────────────────────────────────────────────
+
+else
+  echo "$LOG_PREFIX === Sequential Upgrade: Levels 1 → $LEVEL ==="
+
+  # Step 1: Generate level 1
+  echo "$LOG_PREFIX Generating level 1..."
+  "$SCRIPT_DIR/run.sh" \
+    --variant "$VARIANT" \
+    --rules "$RULES" \
+    --backend "$BACKEND" \
+    --run-index "$RUN_INDEX" \
+    --level 1
+
+  APP_DIR=$(ls -dt "$SCRIPT_DIR/$VARIANT"/*"/$BACKEND/results"/chat-app-* 2>/dev/null | head -1)
+  if [[ -z "$APP_DIR" || ! -d "$APP_DIR" ]]; then
+    echo "$LOG_PREFIX ERROR: Could not find generated app directory"
+    exit 1
+  fi
+  echo "$LOG_PREFIX App dir: $APP_DIR"
+
+  # Grade level 1
+  ITERATION=0
+  while ! grade_app "$APP_DIR" 1; do
+    ITERATION=$((ITERATION + 1))
+    if [[ $ITERATION -ge $MAX_FIX_ITERATIONS ]]; then
+      echo "$LOG_PREFIX Max fixes at level 1 — moving on"
+      break
+    fi
+    fix_bugs "$APP_DIR" "$ITERATION"
+  done
+
+  # Step 2: Upgrade through remaining levels
+  for current_level in $(seq 2 "$LEVEL"); do
+    PROMPT_EXISTS=$(ls "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/composed/$(printf '%02d' "$current_level")_"*.md 2>/dev/null | head -1)
+    if [[ -z "$PROMPT_EXISTS" ]]; then
+      echo "$LOG_PREFIX No prompt for level $current_level — stopping"
+      break
+    fi
+
+    echo "$LOG_PREFIX === Upgrading to level $current_level ==="
+    "$SCRIPT_DIR/run.sh" \
+      --variant "$VARIANT" \
+      --rules "$RULES" \
+      $TEST_FLAG \
+      --backend "$BACKEND" \
+      --run-index "$RUN_INDEX" \
+      --upgrade "$APP_DIR" \
+      --level "$current_level" \
+      --resume-session
+
+    # Grade ALL features (regression test)
+    ITERATION=0
+    while ! grade_app "$APP_DIR" "$current_level"; do
+      ITERATION=$((ITERATION + 1))
+      if [[ $ITERATION -ge $MAX_FIX_ITERATIONS ]]; then
+        echo "$LOG_PREFIX Max fixes at level $current_level — moving on"
+        break
+      fi
+      fix_bugs "$APP_DIR" "$ITERATION"
+    done
+  done
+
+  echo "$LOG_PREFIX === Sequential Upgrade Complete ==="
+fi
+
+# ─── Summary ────────────────────────────────────────────────────────────────
+
+echo ""
+echo "═══════════════════════════════════════════"
+echo "$LOG_PREFIX Exhaust Loop Complete"
+echo "  App dir: $APP_DIR"
+echo "  Variant: $VARIANT"
+echo "  Backend: $BACKEND"
+echo "═══════════════════════════════════════════"
+echo ""
+echo "When done grading, clean up with: ./cleanup.sh $APP_DIR"
diff --git a/tools/llm-sequential-upgrade/run.sh b/tools/llm-sequential-upgrade/run.sh
new file mode 100644
index 00000000000..02bc2b924fe
--- /dev/null
+++ b/tools/llm-sequential-upgrade/run.sh
@@ -0,0 +1,953 @@
+#!/bin/bash -l
+# Sequential Upgrade Launcher — Phase 1: Generate & Deploy
+#
+# Runs code generation and deployment in headless Claude Code with OTel tracking.
+# After this completes, run grade.sh to do browser testing and grading interactively.
+#
+# Usage:
+#   ./run.sh                                    # defaults: level=1, backend=spacetime, variant=sequential-upgrade
+#   ./run.sh --level 5 --backend postgres       # generate from scratch at level 5
+#   ./run.sh --variant one-shot --backend spacetime  # one-shot: all features in one prompt
+#   ./run.sh --rules standard --backend spacetime   # standard: SDK rules only, no templates
+#   ./run.sh --run-index 1 --backend spacetime      # parallel run with offset ports
+#   ./run.sh --fix <app-dir>                    # fix bugs in existing app (reads BUG_REPORT.md)
+#   ./run.sh --upgrade <app-dir> --level 3      # add level 3 features to existing level 2 app (incremental feature file)
+#   ./run.sh --upgrade <app-dir> --level 3 --composed-prompt  # use the full cumulative composed spec instead
+#   ./run.sh --upgrade <app-dir> --level 3 --resume-session   # same, but resume prior session for cache
+#
+# Prerequisites:
+#   - Claude Code CLI installed (claude or npx @anthropic-ai/claude-code)
+#   - Docker running (for OTel Collector)
+#   - SpacetimeDB running (spacetime start)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Configurable container name for PostgreSQL backend
+POSTGRES_CONTAINER="${POSTGRES_CONTAINER:-llm-sequential-upgrade-postgres-1}"
+
+# ─── Parse arguments ─────────────────────────────────────────────────────────
+
+LEVEL=1
+LEVEL_EXPLICIT=""
+BACKEND="spacetime"
+VARIANT="sequential-upgrade"
+RULES="guided"
+TEST_MODE=""  # playwright | chrome-mcp | (empty = no automated testing)
+RUN_INDEX=0
+FIX_MODE=""
+FIX_APP_DIR=""
+UPGRADE_MODE=""
+UPGRADE_APP_DIR=""
+RESUME_SESSION=""
+COMPOSED_UPGRADE_PROMPT=""
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --level) LEVEL="$2"; LEVEL_EXPLICIT=1; shift 2 ;;
+    --backend) BACKEND="$2"; shift 2 ;;
+    --variant) VARIANT="$2"; shift 2 ;;
+    --rules) RULES="$2"; shift 2 ;;
+    --test) TEST_MODE="$2"; shift 2 ;;
+    --run-index) RUN_INDEX="$2"; shift 2 ;;
+    --fix) FIX_MODE=1; FIX_APP_DIR="$2"; shift 2 ;;
+    --upgrade) UPGRADE_MODE=1; UPGRADE_APP_DIR="$2"; shift 2 ;;
+    --composed-prompt) COMPOSED_UPGRADE_PROMPT=1; shift ;;
+    --resume-session) RESUME_SESSION=1; shift ;;
+    *) echo "Unknown option: $1"; exit 1 ;;
+  esac
+done
+
+# Validate rules level
+case "$RULES" in
+  guided|standard|minimal) ;;
+  *) echo "ERROR: --rules must be guided, standard, or minimal"; exit 1 ;;
+esac
+
+# ─── Port allocation ──────────────────────────────────────────────────────────
+# Each backend has a 100-port range. Run-index offsets within that range.
+#   SpacetimeDB: 6173 + run-index  (6173, 6174, 6175, ...)
+#   PostgreSQL:  6273 + run-index  (6273, 6274, 6275, ...)
+#   Express:     6001 + run-index  (6001, 6002, 6003, ...)
+VITE_PORT_STDB=$((6173 + RUN_INDEX))
+VITE_PORT_PG=$((6273 + RUN_INDEX))
+EXPRESS_PORT=$((6001 + RUN_INDEX))
+PG_PORT=6432  # Shared container, isolation via per-run database names
+STDB_PORT=3000  # SpacetimeDB server is shared, modules are isolated by name
+
+if [[ "$BACKEND" == "spacetime" ]]; then
+  VITE_PORT=$VITE_PORT_STDB
+else
+  VITE_PORT=$VITE_PORT_PG
+fi
+
+# Variant-specific defaults
+if [[ "$VARIANT" == "one-shot" ]]; then
+  if [[ -z "$LEVEL_EXPLICIT" ]]; then
+    LEVEL=12  # one-shot defaults to all features
+  fi
+  if [[ -n "$UPGRADE_MODE" ]]; then
+    echo "WARNING: --upgrade is not meaningful with --variant one-shot"
+    echo "One-shot generates all features in a single session."
+    UPGRADE_MODE=""
+    UPGRADE_APP_DIR=""
+  fi
+fi
+
+# Determine mode label early (used in metadata and output)
+if [[ -n "$FIX_MODE" ]]; then
+  MODE_LABEL="fix"
+elif [[ -n "$UPGRADE_MODE" ]]; then
+  MODE_LABEL="upgrade"
+else
+  MODE_LABEL="generate"
+fi
+
+# ─── Find Claude CLI ─────────────────────────────────────────────────────────
+
+# Add Claude Code desktop install to PATH if not already findable
+_APPDATA_UNIX="${APPDATA:-$HOME/AppData/Roaming}"
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  _APPDATA_UNIX=$(cygpath "$_APPDATA_UNIX" 2>/dev/null || echo "$_APPDATA_UNIX")
+fi
+CLAUDE_DESKTOP_DIR="$_APPDATA_UNIX/Claude/claude-code"
+if [[ -d "$CLAUDE_DESKTOP_DIR" ]]; then
+  CLAUDE_LATEST=$(ls -d "$CLAUDE_DESKTOP_DIR"/*/ 2>/dev/null | sort -V | tail -1)
+  if [[ -n "$CLAUDE_LATEST" ]]; then
+    export PATH="$PATH:$CLAUDE_LATEST"
+  fi
+fi
+
+CLAUDE_CMD=""
+if command -v claude &>/dev/null; then
+  CLAUDE_CMD="claude"
+elif command -v claude.exe &>/dev/null; then
+  CLAUDE_CMD="claude.exe"
+else
+  if command -v npx &>/dev/null; then
+    if npx @anthropic-ai/claude-code --version &>/dev/null; then
+      CLAUDE_CMD="npx @anthropic-ai/claude-code"
+    else
+      echo "ERROR: Claude Code CLI not found via npx."
+      echo "Install it with: npm install -g @anthropic-ai/claude-code"
+      exit 1
+    fi
+  else
+    echo "ERROR: Claude Code CLI not found (tried: claude, claude.exe, npx)."
+    echo "Install it with: npm install -g @anthropic-ai/claude-code"
+    exit 1
+  fi
+fi
+echo "Using Claude CLI: $CLAUDE_CMD"
+
+# ─── Pre-flight checks ──────────────────────────────────────────────────────
+
+echo ""
+echo "=== Pre-flight Checks ==="
+
+# Ensure spacetime is in PATH (Windows installs to AppData/Local/SpacetimeDB)
+SPACETIME_DIR="${USERPROFILE:-$HOME}/AppData/Local/SpacetimeDB"
+if [[ -d "$SPACETIME_DIR" ]]; then
+  export PATH="$PATH:$SPACETIME_DIR"
+fi
+# Also try the cygpath-resolved home
+_USER="${USER:-${USERNAME:-$(whoami)}}"
+if [[ -d "/c/Users/$_USER/AppData/Local/SpacetimeDB" ]]; then
+  export PATH="$PATH:/c/Users/$_USER/AppData/Local/SpacetimeDB"
+fi
+
+PG_DATABASE="spacetime"
+PG_CONNECTION_URL="postgresql://spacetime:spacetime@localhost:6432/spacetime"
+
+if [[ "$BACKEND" == "spacetime" ]]; then
+  if spacetime server ping local &>/dev/null; then
+    echo "[OK] SpacetimeDB is running"
+  else
+    echo "[FAIL] SpacetimeDB is not running. Start it with: spacetime start"
+    exit 1
+  fi
+elif [[ "$BACKEND" == "postgres" ]]; then
+  if docker exec "$POSTGRES_CONTAINER" psql -U spacetime -d spacetime -c "SELECT 1" &>/dev/null; then
+    echo "[OK] PostgreSQL container is running"
+  else
+    echo "[FAIL] PostgreSQL is not reachable. Check Docker container $POSTGRES_CONTAINER."
+    exit 1
+  fi
+
+  # Per-run database isolation: each run-index gets its own database
+  # Run 0 uses "spacetime" (default), Run N uses "spacetime_runN"
+  if [[ $RUN_INDEX -gt 0 ]]; then
+    PG_DATABASE="spacetime_run${RUN_INDEX}"
+    # Create the database if it doesn't exist
+    docker exec "$POSTGRES_CONTAINER" psql -U spacetime -d spacetime -c \
+      "SELECT 1 FROM pg_database WHERE datname = '$PG_DATABASE'" | grep -q 1 || \
+      docker exec "$POSTGRES_CONTAINER" psql -U spacetime -d spacetime -c \
+      "CREATE DATABASE $PG_DATABASE OWNER spacetime;" 2>/dev/null
+    echo "[OK] PostgreSQL database: $PG_DATABASE (run-index $RUN_INDEX)"
+  else
+    PG_DATABASE="spacetime"
+    echo "[OK] PostgreSQL database: $PG_DATABASE (default)"
+  fi
+  PG_CONNECTION_URL="postgresql://spacetime:spacetime@localhost:6432/$PG_DATABASE"
+fi
+
+if ! docker info &>/dev/null; then
+  echo "[FAIL] Docker is not running."
+  exit 1
+fi
+
+# Shared telemetry directory (OTel Collector writes here)
+SHARED_TELEMETRY_DIR="$SCRIPT_DIR/telemetry"
+mkdir -p "$SHARED_TELEMETRY_DIR"
+
+# Rotate telemetry log if over 10MB to prevent unbounded growth
+LOGS_FILE="$SHARED_TELEMETRY_DIR/logs.jsonl"
+if [[ -f "$LOGS_FILE" ]]; then
+  SIZE=$(wc -c < "$LOGS_FILE")
+  if [[ $SIZE -gt 10485760 ]]; then
+    ARCHIVE="$SHARED_TELEMETRY_DIR/logs-$(date +%Y%m%d-%H%M%S).jsonl.bak"
+    mv "$LOGS_FILE" "$ARCHIVE"
+    echo "[INFO] Rotated logs.jsonl ($SIZE bytes) to $(basename "$ARCHIVE")"
+  fi
+fi
+
+if docker compose -f "$SCRIPT_DIR/docker-compose.otel.yaml" ps --status running 2>/dev/null | grep -q otel-collector; then
+  echo "[OK] OTel Collector is running"
+else
+  echo "[...] Starting OTel Collector..."
+  docker compose -f "$SCRIPT_DIR/docker-compose.otel.yaml" up -d
+  echo "[OK] OTel Collector started"
+fi
+
+if command -v node &>/dev/null; then
+  echo "[OK] Node.js $(node --version)"
+else
+  echo "[FAIL] Node.js not found."
+  exit 1
+fi
+
+COMPOSED_PROMPT="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/composed/$(printf '%02d' "$LEVEL")_"*".md"
+# shellcheck disable=SC2086
+if ls $COMPOSED_PROMPT &>/dev/null; then
+  PROMPT_FILE=$(ls $COMPOSED_PROMPT 2>/dev/null | head -1)
+  echo "[OK] Prompt file: $(basename "$PROMPT_FILE")"
+else
+  echo "[FAIL] No composed prompt found for level $LEVEL"
+  exit 1
+fi
+
+# Strip UI contracts from prompt if not using Playwright testing
+if [[ "$TEST_MODE" != "playwright" ]]; then
+  STRIPPED_PROMPT="/tmp/seq-upgrade-prompt-${RUN_INDEX}-$(basename "$PROMPT_FILE")"
+  # Remove **UI contract:** blocks (from the line through the next blank line or next ###)
+  sed '/^\*\*UI contract:\*\*/,/^$/d; /^\*\*Important:\*\* Each feature below includes/d' "$PROMPT_FILE" > "$STRIPPED_PROMPT"
+  PROMPT_FILE="$STRIPPED_PROMPT"
+  echo "[OK] UI contracts stripped (test=$TEST_MODE)"
+fi
+
+echo ""
+
+# ─── Create run directories ─────────────────────────────────────────────────
+
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+DATE_STAMP=$(date +%Y%m%d)
+START_TIME=$(date +%Y-%m-%dT%H:%M:%S%z)
+START_TIME_UTC=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+
+# Variant-based directory structure:
+#   llm-sequential-upgrade/<variant>/<variant>-YYYYMMDD/    ← shared comparison run
+#     <backend>/                                   ← per-backend (spacetime|postgres)
+#       results/chat-app-<timestamp>/
+#       telemetry/<run-id>/
+#       inputs/
+VARIANT_DIR="$SCRIPT_DIR/$VARIANT"
+
+# For upgrade/fix, reuse the existing RUN_BASE_DIR from the app's parent structure.
+# For generate, create a new dated run directory.
+if [[ -n "$UPGRADE_MODE" || -n "$FIX_MODE" ]]; then
+  # Derive RUN_BASE_DIR from existing app directory structure:
+  #   <variant>/<variant>-DATE/<backend>/results/chat-app-*/
+  if [[ -n "$UPGRADE_MODE" ]]; then
+    APP_DIR="$UPGRADE_APP_DIR"
+  else
+    APP_DIR="$FIX_APP_DIR"
+  fi
+  # Detect backend from app directory structure BEFORE deriving paths.
+  # Must happen here so $BACKEND is correct for TELEMETRY_DIR assignment below.
+  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+    BACKEND="spacetime"
+  elif [[ -d "$APP_DIR/server" ]]; then
+    BACKEND="postgres"
+  fi
+  # Walk up from app dir: chat-app-* → results → <backend> → <variant>-DATE
+  RUN_BASE_DIR="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
+  # Validate it looks like a run base dir (has a backend subdirectory)
+  if [[ ! -d "$RUN_BASE_DIR/$BACKEND" ]]; then
+    # Fallback: create new run base dir (legacy app dir not under variant structure)
+    RUN_BASE_DIR="$VARIANT_DIR/$VARIANT-$DATE_STAMP"
+  fi
+  TELEMETRY_DIR="$RUN_BASE_DIR/$BACKEND/telemetry"
+  RESULTS_DIR="$RUN_BASE_DIR/$BACKEND/results"
+else
+  # Generate mode: create/reuse a shared dated comparison run directory.
+  # Both backends (spacetime + postgres) share the same parent folder.
+  # Dedup only triggers if THIS backend already has a subdirectory
+  # (i.e. a second generate for the same backend on the same day).
+  RUN_BASE_DIR="$VARIANT_DIR/$VARIANT-$DATE_STAMP"
+  # Dedup: only increment if a COMPLETED run exists for this backend
+  # (has telemetry with cost data). Bare/abandoned stubs don't count.
+  _backend_has_completed_run() {
+    ls "$1/$BACKEND/telemetry/"*/cost-summary.json &>/dev/null 2>&1
+  }
+  if _backend_has_completed_run "$RUN_BASE_DIR"; then
+    SEQ=2
+    while _backend_has_completed_run "$RUN_BASE_DIR-$SEQ"; do ((SEQ++)); done
+    RUN_BASE_DIR="$RUN_BASE_DIR-$SEQ"
+  fi
+  TELEMETRY_DIR="$RUN_BASE_DIR/$BACKEND/telemetry"
+  RESULTS_DIR="$RUN_BASE_DIR/$BACKEND/results"
+fi
+
+# Backend detection for fix/upgrade mode is done earlier (before TELEMETRY_DIR assignment).
+
+if [[ -n "$UPGRADE_MODE" ]]; then
+  RUN_ID="$BACKEND-upgrade-to-level$LEVEL-$TIMESTAMP"
+elif [[ -n "$FIX_MODE" ]]; then
+  RUN_ID="$BACKEND-fix-level$LEVEL-$TIMESTAMP"
+else
+  RUN_ID="$BACKEND-level$LEVEL-$TIMESTAMP"
+  APP_DIR="$RESULTS_DIR/chat-app-$TIMESTAMP"
+  mkdir -p "$APP_DIR"
+fi
+
+RUN_DIR="$TELEMETRY_DIR/$RUN_ID"
+mkdir -p "$RUN_DIR"
+
+# On Windows (Git Bash/MSYS2), convert paths to native format for Node.js
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  RUN_DIR_NATIVE=$(cygpath -w "$RUN_DIR")
+  APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+  SCRIPT_DIR_NATIVE=$(cygpath -w "$SCRIPT_DIR")
+else
+  RUN_DIR_NATIVE="$RUN_DIR"
+  APP_DIR_NATIVE="$APP_DIR"
+  SCRIPT_DIR_NATIVE="$SCRIPT_DIR"
+fi
+
+echo "=== Sequential Upgrade: ${MODE_LABEL^} ==="
+echo "  Variant:   $VARIANT"
+echo "  Rules:     $RULES"
+echo "  Level:     $LEVEL"
+echo "  Backend:   $BACKEND"
+echo "  Run index: $RUN_INDEX (Vite=$VITE_PORT)"
+echo "  Run ID:    $RUN_ID"
+echo "  Run base:  $RUN_BASE_DIR"
+echo "  App dir:   $APP_DIR_NATIVE"
+echo "  Telemetry: $RUN_DIR"
+echo ""
+
+# ─── Enable OpenTelemetry ────────────────────────────────────────────────────
+# Unset Claude Desktop host-management vars — they suppress OTEL telemetry when
+# run.sh is invoked from within a Claude Desktop agent session (Bash tool).
+unset CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST
+unset CLAUDE_CODE_ENTRYPOINT
+
+export CLAUDE_CODE_ENABLE_TELEMETRY=1
+export OTEL_LOGS_EXPORTER=otlp
+export OTEL_METRICS_EXPORTER=otlp
+export OTEL_EXPORTER_OTLP_PROTOCOL=grpc
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
+export OTEL_LOGS_EXPORT_INTERVAL=1000
+export OTEL_METRIC_EXPORT_INTERVAL=5000
+
+# ─── Generate session ID ───────────────────────────────────────────────────
+# NOTE: OTEL_RESOURCE_ATTRIBUTES is set AFTER SESSION_ID is generated (below)
+# Pre-generate a UUID so we can pass --session-id to Claude and save it in
+# metadata for future --resume-session use.
+
+SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())" 2>/dev/null || node -e "const c=require('crypto');console.log([c.randomBytes(4),c.randomBytes(2),c.randomBytes(2),c.randomBytes(2),c.randomBytes(6)].map(b=>b.toString('hex')).join('-'))")
+
+# Tag all OTel records with run.id and session.id so parse-telemetry.mjs can
+# filter by session even when multiple backends run in parallel on the same collector.
+export OTEL_RESOURCE_ATTRIBUTES="run.id=$RUN_ID,session.id=$SESSION_ID"
+
+# ─── Save run metadata ──────────────────────────────────────────────────────
+
+# Escape backslashes for JSON (Windows paths have backslashes)
+APP_DIR_JSON="${APP_DIR_NATIVE//\\/\\\\}"
+
+cat > "$RUN_DIR/metadata.json" <<EOF
+{
+  "level": $LEVEL,
+  "backend": "$BACKEND",
+  "timestamp": "$TIMESTAMP",
+  "startedAt": "$START_TIME",
+  "startedAtUtc": "$START_TIME_UTC",
+  "runId": "$RUN_ID",
+  "appDir": "$APP_DIR_JSON",
+  "promptFile": "$(basename "$PROMPT_FILE")",
+  "phase": "$MODE_LABEL",
+  "variant": "$VARIANT",
+  "rules": "$RULES",
+  "testMode": "${TEST_MODE:-none}",
+  "runIndex": $RUN_INDEX,
+  "vitePort": $VITE_PORT,
+  "expressPort": $EXPRESS_PORT,
+  "pgDatabase": "${PG_DATABASE:-}",
+  "sessionId": "$SESSION_ID"
+}
+EOF
+
+# ─── Snapshot inputs ───────────────────────────────────────────────────────
+# Copy all inputs (prompts, backend specs, tooling, etc.) into the run directory
+# so each run is self-contained and reproducible even if the tooling changes.
+
+snapshot_inputs() {
+  local INPUTS_DIR="$RUN_BASE_DIR/$BACKEND/inputs"
+  if [[ -d "$INPUTS_DIR" ]]; then
+    return  # already snapshotted (upgrade/fix into existing run)
+  fi
+  mkdir -p "$INPUTS_DIR/backends" "$INPUTS_DIR/test-plans" \
+           "$INPUTS_DIR/prompts/composed" "$INPUTS_DIR/prompts/language"
+
+  # Shared tooling
+  for f in CLAUDE.md run.sh grade.sh parse-telemetry.mjs \
+           docker-compose.otel.yaml otel-collector-config.yaml \
+           DEVELOP.md .gitignore; do
+    cp "$SCRIPT_DIR/$f" "$INPUTS_DIR/" 2>/dev/null || true
+  done
+
+  # Backend specs (only relevant backend)
+  cp "$SCRIPT_DIR/backends/$BACKEND.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
+  if [[ "$BACKEND" == "spacetime" ]]; then
+    cp "$SCRIPT_DIR/backends/spacetime-sdk-rules.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
+    cp "$SCRIPT_DIR/backends/spacetime-templates.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
+  fi
+
+  # Test plans
+  cp "$SCRIPT_DIR/test-plans/"*.md "$INPUTS_DIR/test-plans/" 2>/dev/null || true
+
+  # Prompts (only relevant language file, all composed levels)
+  local PROMPTS_SRC="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts"
+  cp "$PROMPTS_SRC/composed/"*.md "$INPUTS_DIR/prompts/composed/" 2>/dev/null || true
+  cp "$PROMPTS_SRC/language/typescript-$BACKEND.md" "$INPUTS_DIR/prompts/language/" 2>/dev/null || true
+
+  echo "  Inputs snapshotted to $INPUTS_DIR"
+}
+
+snapshot_inputs
+
+# Write app-dir.txt so benchmark.sh can find the app directory without racing
+echo "$APP_DIR" > "$RUN_DIR/app-dir.txt"
+
+# ─── Build the prompt ────────────────────────────────────────────────────────
+
+if [[ -n "$FIX_MODE" ]]; then
+  # ─── FIX MODE: Read bug report, fix code, redeploy ──────────────────────
+
+  # In fix mode, APP_DIR is the existing app dir
+  APP_DIR="$FIX_APP_DIR"
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+  else
+    APP_DIR_NATIVE="$APP_DIR"
+  fi
+
+  if [[ ! -f "$APP_DIR/BUG_REPORT.md" ]]; then
+    echo "ERROR: No BUG_REPORT.md found in $APP_DIR"
+    echo "Run the grading session first to produce a bug report."
+    exit 1
+  fi
+
+  echo "=== Sequential Upgrade: Fix Iteration ==="
+  echo "  App dir: $APP_DIR_NATIVE"
+  echo "  Bug report: $APP_DIR_NATIVE/BUG_REPORT.md"
+  echo ""
+
+  # Detect backend from existing app directory structure
+  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+    FIX_BACKEND="spacetime"
+  elif [[ -d "$APP_DIR/server" ]]; then
+    FIX_BACKEND="postgres"
+  else
+    FIX_BACKEND="unknown"
+  fi
+
+  PROMPT=$(cat <<PROMPT_EOF
+Fix the bugs in the sequential upgrade app.
+
+**App directory:** $APP_DIR_NATIVE
+**Backend:** $FIX_BACKEND
+
+**Instructions:**
+1. Read the CLAUDE.md in this directory for backend-specific architecture and deploy instructions
+2. Read BUG_REPORT.md in the app directory — it describes what's broken
+3. Read the relevant source code files mentioned in the bug report
+4. Fix each bug described in the report
+5. Rebuild and redeploy ALL servers:
+   - For PostgreSQL: restart the Express server (npm run dev in server/) AND the Vite client
+   - For SpacetimeDB: run spacetime publish, then restart the Vite client
+6. Verify the fix by testing the endpoint/behavior described in the bug report
+7. Make sure ALL servers are running:
+   - Client dev server on port $VITE_PORT
+   - For PostgreSQL: Express API server on port $EXPRESS_PORT (test with curl)
+8. Append this fix iteration to ITERATION_LOG.md in the app directory
+
+CRITICAL: After fixing code, you MUST verify the servers are running and the bug is fixed.
+Do NOT just edit files and say "done" — actually restart the servers and test.
+
+Do NOT do browser testing — that happens in the grading session.
+Cost tracking is automatic via OpenTelemetry — do NOT estimate tokens.
+
+When done, output: FIX_COMPLETE
+PROMPT_EOF
+  )
+
+elif [[ -n "$UPGRADE_MODE" ]]; then
+  # ─── UPGRADE MODE: Add new features from a higher level prompt ─────────
+
+  APP_DIR="$UPGRADE_APP_DIR"
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
+  else
+    APP_DIR_NATIVE="$APP_DIR"
+  fi
+
+  # ─── Snapshot previous level before upgrading ─────────────────────────
+  PREV_LEVEL=$((LEVEL - 1))
+  SNAPSHOT_DIR="$APP_DIR/level-$PREV_LEVEL"
+  if [[ -d "$SNAPSHOT_DIR" ]]; then
+    echo "Snapshot level-$PREV_LEVEL already exists — skipping snapshot"
+  else
+    echo "Snapshotting current app state to level-$PREV_LEVEL..."
+    mkdir -p "$SNAPSHOT_DIR"
+    # Copy app source dirs (exclude node_modules, dist, snapshots)
+    for item in "$APP_DIR"/*; do
+      base=$(basename "$item")
+      case "$base" in
+        level-*|node_modules|dist|.vite|drizzle|dev-server.log) continue ;;
+        *) cp -r "$item" "$SNAPSHOT_DIR/" 2>/dev/null ;;
+      esac
+    done
+    echo "  Saved to $SNAPSHOT_DIR"
+  fi
+
+  # Detect backend from existing app directory structure
+  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+    UPGRADE_BACKEND="spacetime"
+  elif [[ -d "$APP_DIR/server" ]]; then
+    UPGRADE_BACKEND="postgres"
+  else
+    UPGRADE_BACKEND="unknown"
+  fi
+
+  # Resolve prompt file path
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    PROMPT_FILE_NATIVE=$(cygpath -w "$PROMPT_FILE")
+    LANG_PROMPT_NATIVE=$(cygpath -w "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$UPGRADE_BACKEND.md")
+  else
+    PROMPT_FILE_NATIVE="$PROMPT_FILE"
+    LANG_PROMPT_NATIVE="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$UPGRADE_BACKEND.md"
+  fi
+
+  PREV_LEVEL=$((LEVEL - 1))
+
+  echo "=== Sequential Upgrade: Upgrade to Level $LEVEL ==="
+  echo "  App dir: $APP_DIR_NATIVE"
+  echo "  Backend: $UPGRADE_BACKEND"
+  echo "  From level: $PREV_LEVEL → $LEVEL"
+  echo "  Prompt: $(basename "$PROMPT_FILE")"
+  echo ""
+
+  # In upgrade mode, default to the incremental feature file (only the new
+  # feature). Pass --composed-prompt to use the full cumulative composed spec
+  # for this level, matching how the original L1-L11 benchmark was prompted.
+  if [[ -n "$COMPOSED_UPGRADE_PROMPT" ]]; then
+    FEATURE_FILE="$PROMPT_FILE"
+    echo "  Using composed (cumulative) feature file: $(basename "$FEATURE_FILE")"
+  else
+    FEATURE_PROMPT="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/features/$(printf '%02d' "$LEVEL")_"*".md"
+    # shellcheck disable=SC2086
+    FEATURE_FILE=$(ls $FEATURE_PROMPT 2>/dev/null | head -1)
+    if [[ -n "$FEATURE_FILE" ]]; then
+      echo "  Using incremental feature file: $(basename "$FEATURE_FILE")"
+    else
+      echo "  WARNING: No incremental feature file for level $LEVEL, falling back to composed prompt"
+      FEATURE_FILE="$PROMPT_FILE"
+    fi
+  fi
+
+  # Read language and feature files to inline into the prompt
+  LANG_CONTENT=$(cat "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$UPGRADE_BACKEND.md" 2>/dev/null || echo "")
+  FEATURE_CONTENT=$(cat "$FEATURE_FILE" 2>/dev/null || echo "")
+
+  PROMPT=$(cat <<PROMPT_EOF
+Upgrade the existing chat app to add the new feature(s) from level $LEVEL.
+
+**App directory:** $APP_DIR_NATIVE
+**Backend:** $UPGRADE_BACKEND
+**Current level:** $PREV_LEVEL (all features from level $PREV_LEVEL are already implemented and working)
+**Target level:** $LEVEL
+
+**Instructions:**
+1. Read the CLAUDE.md in this directory for backend-specific architecture and SDK reference
+2. Read the existing source code to understand the current architecture
+3. Add the new feature(s) to both backend and frontend, integrating with the existing code
+4. Rebuild and redeploy (see CLAUDE.md for backend-specific steps)
+5. Verify the build succeeds: npx tsc --noEmit && npm run build (if applicable)
+6. Make sure the dev server is running on port $VITE_PORT
+
+Features from level $PREV_LEVEL and below are ALREADY IMPLEMENTED — do NOT rewrite them.
+Only add the NEW feature(s) that appear in the feature spec below but not in level $PREV_LEVEL.
+
+Do NOT do browser testing — that happens in a separate grading session.
+Cost tracking is automatic via OpenTelemetry — do NOT estimate tokens.
+
+When done, output: UPGRADE_COMPLETE
+
+---
+
+$LANG_CONTENT
+
+---
+
+$FEATURE_CONTENT
+PROMPT_EOF
+  )
+
+else
+  # ─── GENERATE MODE: Initial code generation and deploy ──────────────────
+
+  # Resolve absolute paths for prompt references
+  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+    PROMPT_FILE_NATIVE=$(cygpath -w "$PROMPT_FILE")
+    LANG_PROMPT_NATIVE=$(cygpath -w "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$BACKEND.md")
+  else
+    PROMPT_FILE_NATIVE="$PROMPT_FILE"
+    LANG_PROMPT_NATIVE="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$BACKEND.md"
+  fi
+
+  # Read language and feature files to inline into the prompt
+  LANG_CONTENT=$(cat "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/language/typescript-$BACKEND.md" 2>/dev/null || echo "")
+  FEATURE_CONTENT=$(cat "$PROMPT_FILE" 2>/dev/null || echo "")
+
+  PROMPT=$(cat <<PROMPT_EOF
+Run the sequential upgrade benchmark — GENERATE AND DEPLOY ONLY.
+
+**Configuration:**
+- Level: $LEVEL
+- Backend: $BACKEND
+- App output directory: $APP_DIR_NATIVE (this is also your working directory)
+- Run ID: $RUN_ID
+
+**Instructions:**
+1. Read the CLAUDE.md in this directory — it has backend-specific setup, architecture, and SDK reference
+2. Follow the phases in CLAUDE.md to generate, build, and deploy the app
+3. Write all code in the current directory
+
+If the build fails, fix and retry (up to 3 times per phase).
+Write an ITERATION_LOG.md tracking any build reprompts.
+
+Do NOT do browser testing — that happens in a separate grading session.
+Cost tracking is automatic via OpenTelemetry — do NOT estimate tokens.
+
+When done, output: DEPLOY_COMPLETE
+
+---
+
+$LANG_CONTENT
+
+---
+
+$FEATURE_CONTENT
+PROMPT_EOF
+  )
+fi
+
+echo "Starting Claude Code session ($MODE_LABEL)..."
+echo "─────────────────────────────────────────────"
+
+# ─── Assemble backend-specific CLAUDE.md into app directory ─────────────────
+# Build CLAUDE.md at runtime by concatenating the workflow, SDK rules, and
+# templates. This ensures Claude always gets the latest rules inlined directly
+# (no "go find and read this other file" that it might skip).
+
+if [[ -z "$FIX_MODE" && -z "$UPGRADE_MODE" ]]; then
+  # Assemble CLAUDE.md based on --rules level:
+  #   guided:   full phases + SDK rules + code templates (most prescriptive)
+  #   standard: SDK rules only (no templates, no step-by-step phases)
+  #   minimal:  just the tech stack name (least prescriptive)
+  if [[ "$RULES" == "minimal" ]]; then
+    if [[ "$BACKEND" == "spacetime" ]]; then
+      echo "Build this app using the SpacetimeDB TypeScript SDK (npm package: spacetimedb)." > "$APP_DIR/CLAUDE.md"
+      echo "Server module in backend/spacetimedb/, React client in client/." >> "$APP_DIR/CLAUDE.md"
+      echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+    else
+      echo "Build this app using PostgreSQL + Express + Socket.io + Drizzle ORM." > "$APP_DIR/CLAUDE.md"
+      echo "Express server in server/, React client in client/." >> "$APP_DIR/CLAUDE.md"
+      echo "PostgreSQL connection: $PG_CONNECTION_URL" >> "$APP_DIR/CLAUDE.md"
+      echo "Express port: $EXPRESS_PORT | Vite port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+    fi
+    echo "Assembled minimal CLAUDE.md (rules=$RULES)"
+  elif [[ "$RULES" == "standard" ]]; then
+    if [[ "$BACKEND" == "spacetime" ]]; then
+      cat "$SCRIPT_DIR/backends/spacetime-sdk-rules.md" > "$APP_DIR/CLAUDE.md"
+    else
+      echo "# PostgreSQL Backend" > "$APP_DIR/CLAUDE.md"
+      echo "" >> "$APP_DIR/CLAUDE.md"
+      echo "PostgreSQL connection: \`$PG_CONNECTION_URL\`" >> "$APP_DIR/CLAUDE.md"
+      echo "" >> "$APP_DIR/CLAUDE.md"
+      echo "Use Express (port $EXPRESS_PORT) + Socket.io + Drizzle ORM. Server in \`server/\`, client in \`client/\`." >> "$APP_DIR/CLAUDE.md"
+      echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+    fi
+    echo "Assembled standard CLAUDE.md (rules=$RULES)"
+  else
+    # guided (default) — full phases + SDK rules + templates
+    if [[ "$BACKEND" == "spacetime" ]]; then
+      {
+        cat "$SCRIPT_DIR/backends/spacetime.md"
+        echo ""
+        echo "---"
+        echo ""
+        cat "$SCRIPT_DIR/backends/spacetime-sdk-rules.md"
+        echo ""
+        echo "---"
+        echo ""
+        cat "$SCRIPT_DIR/backends/spacetime-templates.md"
+      } > "$APP_DIR/CLAUDE.md"
+      echo "Assembled guided CLAUDE.md from spacetime.md + sdk-rules + templates"
+    else
+      cp "$SCRIPT_DIR/backends/$BACKEND.md" "$APP_DIR/CLAUDE.md"
+      echo "Copied backends/$BACKEND.md → app CLAUDE.md"
+    fi
+  fi
+
+  # Prepend unique run ID to bust Anthropic's server-side prompt cache.
+  # Cache is keyed on content — a unique prefix guarantees a cold run every time.
+  sed -i "1s|^|<!-- run-id: $RUN_ID -->\n\n|" "$APP_DIR/CLAUDE.md"
+
+  # Patch ports and database names in CLAUDE.md for parallel runs (run-index > 0)
+  if [[ $RUN_INDEX -gt 0 ]]; then
+    sed -i \
+      -e "s/6173/$VITE_PORT_STDB/g" \
+      -e "s/6273/$VITE_PORT_PG/g" \
+      -e "s/:6001/:$EXPRESS_PORT/g" \
+      -e "s/localhost:6001/localhost:$EXPRESS_PORT/g" \
+      -e "s|localhost:6432/spacetime|localhost:6432/$PG_DATABASE|g" \
+      -e "s|spacetime:spacetime@localhost:6432/spacetime|spacetime:spacetime@localhost:6432/$PG_DATABASE|g" \
+      "$APP_DIR/CLAUDE.md"
+    echo "  Patched for run-index=$RUN_INDEX (Vite=$VITE_PORT, Express=$EXPRESS_PORT, DB=$PG_DATABASE)"
+  fi
+fi
+
+# ─── Run Claude Code ─────────────────────────────────────────────────────────
+# Run from the APP directory so CLAUDE.md auto-discovery picks up the
+# backend-specific file, not the parent llm-sequential-upgrade/CLAUDE.md.
+
+cd "$APP_DIR"
+
+# NOTE: Git isolation disabled — it breaks --resume-session because Claude Code
+# ties sessions to the project root (.git location). Without isolation, Claude
+# may see parent repo files, but session continuity is more important for
+# sequential upgrades. Use cleanup.sh after testing to remove any artifacts.
+
+# Build resume flag if --resume-session was passed and a prior session ID exists
+RESUME_FLAG=""
+if [[ -n "$RESUME_SESSION" && -n "$UPGRADE_MODE" ]]; then
+  # Find the most recent telemetry dir for this app to get its session ID.
+  # Search variant structure: <variant>/<variant>-DATE/telemetry/*/
+  # Sort by modification time (newest first), break on first match.
+  PREV_SESSION_ID=""
+  SEARCH_DIRS=$(find "$VARIANT_DIR" -path "*/telemetry/*" -name "metadata.json" -exec dirname {} \; 2>/dev/null | sort -r)
+  for tdir in $SEARCH_DIRS; do
+    if [[ -f "$tdir/metadata.json" ]]; then
+      META_PATH="$(cygpath -w "$tdir/metadata.json" 2>/dev/null || echo "$tdir/metadata.json")"
+      TDIR_APP=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(m.appDir||'')" -- "$META_PATH" 2>/dev/null)
+      if [[ "$TDIR_APP" == "$APP_DIR_NATIVE" || "$TDIR_APP" == "$APP_DIR_JSON" ]]; then
+        SID=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(m.sessionId||'')" -- "$META_PATH" 2>/dev/null)
+        if [[ -n "$SID" ]]; then
+          PREV_SESSION_ID="$SID"
+          break  # newest match found, stop searching
+        fi
+      fi
+    fi
+  done
+  if [[ -n "$PREV_SESSION_ID" ]]; then
+    RESUME_FLAG="--resume $PREV_SESSION_ID --fork-session"
+    echo "Forking prior session: $PREV_SESSION_ID"
+  else
+    echo "No prior session ID found for this app — starting fresh"
+  fi
+fi
+
+# --fork-session creates a new session branched from the prior one (keeps context)
+$CLAUDE_CMD --print --verbose --output-format text --dangerously-skip-permissions \
+  --add-dir "$APP_DIR" \
+  --add-dir "$SCRIPT_DIR" \
+  --add-dir "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts" \
+  --session-id "$SESSION_ID" $RESUME_FLAG -p "$PROMPT"
+EXIT_CODE=$?
+
+echo ""
+echo "─────────────────────────────────────────────"
+
+# ─── Record end time ─────────────────────────────────────────────────────────
+
+END_TIME=$(date +%Y-%m-%dT%H:%M:%S%z)
+END_TIME_UTC=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+
+# Update metadata with end time — use native path for Node.js on Windows
+METADATA_FILE_NATIVE="$RUN_DIR_NATIVE/metadata.json"
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  METADATA_FILE_NATIVE=$(cygpath -w "$RUN_DIR/metadata.json")
+fi
+node -e "
+const fs = require('fs');
+const f = process.argv[1];
+const m = JSON.parse(fs.readFileSync(f, 'utf-8'));
+m.endedAt = '$END_TIME';
+m.endedAtUtc = '$END_TIME_UTC';
+m.exitCode = $EXIT_CODE;
+m.mode = '$MODE_LABEL';
+m.sessionId = '$SESSION_ID';
+fs.writeFileSync(f, JSON.stringify(m, null, 2));
+" -- "$METADATA_FILE_NATIVE" || echo "WARNING: Failed to update metadata with end time"
+
+# ─── Snapshot completed level (upgrade mode) ─────────────────────────────────
+
+if [[ -n "$UPGRADE_MODE" && $EXIT_CODE -eq 0 ]]; then
+  LEVEL_SNAPSHOT="$APP_DIR/level-$LEVEL"
+  if [[ ! -d "$LEVEL_SNAPSHOT" ]]; then
+    echo "Snapshotting upgraded app state to level-$LEVEL..."
+    mkdir -p "$LEVEL_SNAPSHOT"
+    for item in "$APP_DIR"/*; do
+      base=$(basename "$item")
+      case "$base" in
+        level-*|node_modules|dist|.vite|drizzle|dev-server.log) continue ;;
+        *) cp -r "$item" "$LEVEL_SNAPSHOT/" 2>/dev/null ;;
+      esac
+    done
+    echo "  Saved to $LEVEL_SNAPSHOT"
+  fi
+fi
+
+# ─── Parse telemetry ─────────────────────────────────────────────────────────
+
+echo ""
+echo "=== $MODE_LABEL Complete ==="
+echo "  Started: $START_TIME"
+echo "  Ended:   $END_TIME"
+echo ""
+
+# Resolve shared logs file path for telemetry parser
+LOGS_FILE_NATIVE="$SHARED_TELEMETRY_DIR/logs.jsonl"
+if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+  LOGS_FILE_NATIVE=$(cygpath -w "$SHARED_TELEMETRY_DIR/logs.jsonl")
+fi
+
+echo "Parsing telemetry..."
+if node "$SCRIPT_DIR_NATIVE/parse-telemetry.mjs" "$RUN_DIR_NATIVE" "--logs-file=$LOGS_FILE_NATIVE" "--extract-raw"; then
+  echo ""
+  echo "=== Results ==="
+  echo "  App:        $APP_DIR_NATIVE"
+  echo "  Cost:       $RUN_DIR/COST_REPORT.md"
+  echo ""
+  if [[ -n "$FIX_MODE" ]]; then
+    echo "=== Next Step: Re-grade the app ==="
+    echo "  In Claude Code, say:"
+    echo "    Re-grade the app at $APP_DIR_NATIVE"
+    echo ""
+  elif [[ -n "$UPGRADE_MODE" ]]; then
+    echo "=== Next Step: Grade the upgraded app (level $LEVEL) ==="
+    echo "  In Claude Code, say:"
+    echo "    Grade the app at $APP_DIR_NATIVE at level $LEVEL"
+    echo ""
+    NEXT_LEVEL=$((LEVEL + 1))
+    NEXT_PROMPT="$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts/composed/$(printf '%02d' "$NEXT_LEVEL")_"*".md"
+    if ls $NEXT_PROMPT &>/dev/null 2>&1; then
+      echo "  To continue upgrading after grading:"
+      echo "    ./run.sh --upgrade $APP_DIR --level $NEXT_LEVEL"
+      echo ""
+    fi
+  else
+    echo "=== Next Step: Grade the app ==="
+    echo "  In Claude Code, say:"
+    echo "    Grade the app at $APP_DIR_NATIVE"
+    echo ""
+  fi
+else
+  echo "WARNING: Telemetry parsing failed. Raw logs at: $SHARED_TELEMETRY_DIR/logs.jsonl"
+fi
+
+# ─── Auto-grade with Playwright (if installed) ──────────────────────────────
+
+PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
+if [[ $EXIT_CODE -eq 0 && "$TEST_MODE" == "playwright" && -f "$PLAYWRIGHT_DIR/node_modules/.bin/playwright" ]]; then
+  echo ""
+  echo "=== Auto-grading with Playwright ==="
+  echo "  App URL: http://localhost:$VITE_PORT"
+
+  # Wait for dev server to be ready
+  READY=0
+  for i in $(seq 1 30); do
+    if curl -s -o /dev/null -w "%{http_code}" "http://localhost:$VITE_PORT" 2>/dev/null | grep -q "200"; then
+      READY=1
+      break
+    fi
+    sleep 1
+  done
+
+  if [[ $READY -eq 1 ]]; then
+    # Reset backend state for a clean test (fresh module or DB)
+    echo "Resetting backend state for clean test..."
+    "$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed — tests may use stale state"
+
+    # Wait for the app to reconnect after reset
+    sleep 3
+
+    # Determine which feature specs to run based on prompt level
+    # Level → max feature number mapping:
+    #   1=4, 2=5, 3=6, 4=7, 5=8, 6=9, 7=10, 8=11, 9=12, 10=13, 11=14, 12=15,
+    #   13=16, 14=17, 15=18, 16=19, 17=20, 18=21, 19=22
+    MAX_FEATURE=$((LEVEL + 3))
+    if [[ $MAX_FEATURE -gt 22 ]]; then MAX_FEATURE=22; fi
+
+    PW_SPEC_FILES=""
+    for feat_num in $(seq 1 $MAX_FEATURE); do
+      FEAT_PAD=$(printf '%02d' "$feat_num")
+      SPEC_FILE=$(ls "$PLAYWRIGHT_DIR/specs/feature-${FEAT_PAD}-"*.spec.ts 2>/dev/null | head -1)
+      if [[ -n "$SPEC_FILE" ]]; then
+        PW_SPEC_FILES="$PW_SPEC_FILES $SPEC_FILE"
+      fi
+    done
+    echo "  Testing features 1-$MAX_FEATURE ($LEVEL prompt level)"
+
+    mkdir -p /tmp/pw-results-$RUN_INDEX
+    cd "$PLAYWRIGHT_DIR"
+    APP_URL="http://localhost:$VITE_PORT" npx playwright test $PW_SPEC_FILES --reporter=json \
+      1>/tmp/pw-results-$RUN_INDEX/results.json 2>/dev/null || true
+    cd "$APP_DIR"
+
+    RESULTS_SIZE=$(wc -c < /tmp/pw-results-$RUN_INDEX/results.json 2>/dev/null || echo "0")
+    if [[ "$RESULTS_SIZE" -gt 100 ]]; then
+      PW_RESULTS="/tmp/pw-results-$RUN_INDEX/results.json"
+      if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
+        PW_RESULTS=$(cygpath -w "$PW_RESULTS")
+      fi
+      node "$SCRIPT_DIR_NATIVE/parse-playwright-results.mjs" "$PW_RESULTS" "$APP_DIR_NATIVE" "$BACKEND"
+      # Copy raw results into telemetry dir for archival
+      cp /tmp/pw-results-$RUN_INDEX/results.json "$RUN_DIR/playwright-results.json" 2>/dev/null || true
+    else
+      echo "WARNING: Playwright produced no results (app may not have loaded)"
+    fi
+  else
+    echo "WARNING: Dev server not responding on port $VITE_PORT — skipping Playwright grading"
+  fi
+elif [[ $EXIT_CODE -eq 0 && "$TEST_MODE" == "agents" ]]; then
+  echo ""
+  echo "=== Auto-grading with Playwright Agents ==="
+  "$SCRIPT_DIR/grade-agents.sh" "$APP_DIR" 2>&1 || echo "WARNING: Agent grading failed"
+elif [[ $EXIT_CODE -ne 0 ]]; then
+  echo "Skipping auto-grade — code generation failed (exit $EXIT_CODE)"
+fi
+