From 083bc81874b0f26b7cbcbba05f677fca2ce2755c Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 13:34:31 -0500
Subject: [PATCH 1/7] work on making ci faster

---
 .github/workflows/ci.yml                      | 163 +++++++++++++-----
 .../lib/v3/tests/v3.bb.playwright.config.ts   |   4 +-
 .../v3/tests/v3.local.playwright.config.ts    |   5 +-
 .../core/lib/v3/tests/v3.playwright.config.ts |   5 +-
 4 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d8bbe6657..bcb2e90ac 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,6 +26,7 @@ jobs:
     outputs:
       core: ${{ steps.filter.outputs.core }}
       evals: ${{ steps.filter.outputs.evals }}
+      docs-only: ${{ steps.filter.outputs.docs-only }}
     steps:
       - name: Check out repository code
         uses: actions/checkout@v4
@@ -44,10 +45,16 @@ jobs:
               - 'packages/evals/**'
               - 'package.json'
               - 'pnpm-lock.yaml'
+            docs-only:
+              - '**/*.md'
+              - 'examples/**'
+              - '!packages/**/*.md'
 
   determine-evals:
+    needs: [determine-changes]
     runs-on: ubuntu-latest
     outputs:
+      skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }}
       run-combination: ${{ steps.check-labels.outputs.run-combination }}
       run-extract: ${{ steps.check-labels.outputs.run-extract }}
       run-act: ${{ steps.check-labels.outputs.run-act }}
@@ -57,9 +64,36 @@ jobs:
     steps:
       - id: check-labels
         run: |
+          # Check if skip-evals label is present
+          if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then
+            echo "skip-evals label found - skipping all evals"
+            echo "skip-all-evals=true" >> $GITHUB_OUTPUT
+            echo "run-combination=false" >> $GITHUB_OUTPUT
+            echo "run-extract=false" >> $GITHUB_OUTPUT
+            echo "run-act=false" >> $GITHUB_OUTPUT
+            echo "run-observe=false" >> $GITHUB_OUTPUT
+            echo "run-targeted-extract=false" >> $GITHUB_OUTPUT
+            echo "run-agent=false" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          # Skip evals if only docs/examples changed (and not on main)
+          if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" && "${{ github.ref }}" != "refs/heads/main" ]]; then
+            echo "Only docs/examples changed - skipping evals"
+            echo "skip-all-evals=true" >> $GITHUB_OUTPUT
+            echo "run-combination=false" >> $GITHUB_OUTPUT
+            echo "run-extract=false" >> $GITHUB_OUTPUT
+            echo "run-act=false" >> $GITHUB_OUTPUT
+            echo "run-observe=false" >> $GITHUB_OUTPUT
+            echo "run-targeted-extract=false" >> $GITHUB_OUTPUT
+            echo "run-agent=false" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
           # Default to running all tests on main branch
           if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
             echo "Running all tests for main branch"
+            echo "skip-all-evals=false" >> $GITHUB_OUTPUT
             echo "run-combination=true" >> $GITHUB_OUTPUT
             echo "run-extract=true" >> $GITHUB_OUTPUT
             echo "run-act=true" >> $GITHUB_OUTPUT
@@ -70,6 +104,7 @@ jobs:
           fi
 
           # Check for specific labels
+          echo "skip-all-evals=false" >> $GITHUB_OUTPUT
           echo "run-combination=${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" >> $GITHUB_OUTPUT
           echo "run-extract=${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" >> $GITHUB_OUTPUT
           echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT
@@ -85,16 +120,19 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Run Lint
         run: pnpm run lint
@@ -107,16 +145,19 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Run Build
         run: pnpm run build
@@ -131,16 +172,19 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         run: pnpm run build
@@ -166,16 +210,19 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         run: pnpm run build
@@ -186,6 +233,7 @@ jobs:
   run-regression-evals:
     needs:
       [run-e2e-bb-tests, run-e2e-local-tests, determine-evals]
+    if: needs.determine-evals.outputs.skip-all-evals != 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 9
     outputs:
@@ -203,16 +251,19 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         run: pnpm run build
@@ -263,18 +314,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-combination == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-combination == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-combination == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-combination == 'true'
@@ -325,18 +380,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-act == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-act == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-act == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-act == 'true'
@@ -390,18 +449,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-extract == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-extract == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-extract == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-extract == 'true'
@@ -458,18 +521,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-observe == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-observe == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-observe == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-observe == 'true'
@@ -523,18 +590,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-targeted-extract == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
@@ -592,18 +663,22 @@ jobs:
             echo "has_label=true" >> $GITHUB_OUTPUT
           fi
 
+      - name: Setup pnpm
+        if: needs.determine-evals.outputs.run-agent == 'true'
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9
+
       - name: Set up Node.js
         if: needs.determine-evals.outputs.run-agent == 'true'
         uses: actions/setup-node@v4
         with:
           node-version: "20"
+          cache: "pnpm"
 
       - name: Install dependencies
         if: needs.determine-evals.outputs.run-agent == 'true'
-        run: |
-          rm -rf node_modules
-          npm i -g pnpm
-          pnpm install --no-frozen-lockfile
+        run: pnpm install --frozen-lockfile
 
       - name: Build Stagehand
         if: needs.determine-evals.outputs.run-agent == 'true'
diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
index e7ea4bf5a..1dddf8c92 100644
--- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
@@ -16,7 +16,9 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  workers: 2,
+  // Conservative worker count for Browserbase tests (external service dependency)
+  // Can be increased if Browserbase API handles higher concurrency well
+  workers: process.env.CI ? 3 : 4,
   fullyParallel: true,
   reporter: "list",
   use: {
diff --git a/packages/core/lib/v3/tests/v3.local.playwright.config.ts b/packages/core/lib/v3/tests/v3.local.playwright.config.ts
index 67294f8a1..381bb258b 100644
--- a/packages/core/lib/v3/tests/v3.local.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.local.playwright.config.ts
@@ -16,8 +16,9 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance.
-  workers: 2,
+  // Increased from 2 to improve CI performance. Use environment variable to control.
+  // CI uses 4 workers, local development can use up to 8 for faster test runs.
+  workers: process.env.CI ? 4 : 6,
   fullyParallel: true,
   reporter: "list",
   use: {
diff --git a/packages/core/lib/v3/tests/v3.playwright.config.ts b/packages/core/lib/v3/tests/v3.playwright.config.ts
index 8db19a1d6..f9745c077 100644
--- a/packages/core/lib/v3/tests/v3.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.playwright.config.ts
@@ -4,8 +4,9 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance.
-  workers: 2,
+  // Increased from 2 to improve CI performance. Use environment variable to control.
+  // CI uses 4 workers, local development can use up to 8 for faster test runs.
+  workers: process.env.CI ? 4 : 6,
   fullyParallel: true,
   reporter: "list",
   use: {

From 5701a0635dd59e9f64bec28362df93953773998e Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 13:42:22 -0500
Subject: [PATCH 2/7] remove comment

---
 packages/core/lib/v3/tests/v3.bb.playwright.config.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
index 1dddf8c92..f08f0db12 100644
--- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
@@ -16,8 +16,6 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  // Conservative worker count for Browserbase tests (external service dependency)
-  // Can be increased if Browserbase API handles higher concurrency well
   workers: process.env.CI ? 3 : 4,
   fullyParallel: true,
   reporter: "list",

From 31c1108c33eac127c1906b997757ba7c57737952 Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 13:47:18 -0500
Subject: [PATCH 3/7] remove pnpm version conflict

---
 .github/workflows/ci.yml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bcb2e90ac..448abebea 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -122,8 +122,6 @@ jobs:
 
       - name: Setup pnpm
         uses: pnpm/action-setup@v4
-        with:
-          version: 9
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
@@ -147,8 +145,6 @@ jobs:
 
       - name: Setup pnpm
         uses: pnpm/action-setup@v4
-        with:
-          version: 9
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
@@ -174,8 +170,6 @@ jobs:
 
       - name: Setup pnpm
         uses: pnpm/action-setup@v4
-        with:
-          version: 9
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
@@ -212,8 +206,6 @@ jobs:
 
       - name: Setup pnpm
         uses: pnpm/action-setup@v4
-        with:
-          version: 9
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
@@ -253,8 +245,6 @@ jobs:
 
       - name: Setup pnpm
         uses: pnpm/action-setup@v4
-        with:
-          version: 9
 
       - name: Set up Node.js
         uses: actions/setup-node@v4

From 3e782497ffb5098fb8dcb5577c6248335644330f Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 14:25:40 -0500
Subject: [PATCH 4/7] scale back concurrency

---
 .github/workflows/ci.yml                          | 15 ++++++++++++++-
 .../lib/v3/tests/multi-instance-logger.spec.ts    |  5 +++++
 .../core/lib/v3/tests/v3.bb.playwright.config.ts  |  4 +++-
 .../lib/v3/tests/v3.local.playwright.config.ts    |  6 +++---
 4 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 448abebea..2477af42f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,6 +55,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }}
+      run-regression: ${{ steps.check-labels.outputs.run-regression }}
       run-combination: ${{ steps.check-labels.outputs.run-combination }}
       run-extract: ${{ steps.check-labels.outputs.run-extract }}
       run-act: ${{ steps.check-labels.outputs.run-act }}
@@ -68,6 +69,7 @@ jobs:
           if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then
             echo "skip-evals label found - skipping all evals"
             echo "skip-all-evals=true" >> $GITHUB_OUTPUT
+            echo "run-regression=false" >> $GITHUB_OUTPUT
             echo "run-combination=false" >> $GITHUB_OUTPUT
             echo "run-extract=false" >> $GITHUB_OUTPUT
             echo "run-act=false" >> $GITHUB_OUTPUT
@@ -81,6 +83,7 @@ jobs:
           if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" && "${{ github.ref }}" != "refs/heads/main" ]]; then
             echo "Only docs/examples changed - skipping evals"
             echo "skip-all-evals=true" >> $GITHUB_OUTPUT
+            echo "run-regression=false" >> $GITHUB_OUTPUT
             echo "run-combination=false" >> $GITHUB_OUTPUT
             echo "run-extract=false" >> $GITHUB_OUTPUT
             echo "run-act=false" >> $GITHUB_OUTPUT
@@ -94,6 +97,7 @@ jobs:
           if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
             echo "Running all tests for main branch"
             echo "skip-all-evals=false" >> $GITHUB_OUTPUT
+            echo "run-regression=true" >> $GITHUB_OUTPUT
             echo "run-combination=true" >> $GITHUB_OUTPUT
             echo "run-extract=true" >> $GITHUB_OUTPUT
             echo "run-act=true" >> $GITHUB_OUTPUT
@@ -103,6 +107,15 @@ jobs:
             exit 0
           fi
 
+          # Check for skip-regression-evals label
+          if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-regression-evals') }}" == "true" ]]; then
+            echo "skip-regression-evals label found - regression evals will be skipped"
+            echo "run-regression=false" >> $GITHUB_OUTPUT
+          else
+            echo "Regression evals will run by default"
+            echo "run-regression=true" >> $GITHUB_OUTPUT
+          fi
+
           # Check for specific labels
           echo "skip-all-evals=false" >> $GITHUB_OUTPUT
           echo "run-combination=${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" >> $GITHUB_OUTPUT
@@ -225,7 +238,7 @@ jobs:
   run-regression-evals:
     needs:
       [run-e2e-bb-tests, run-e2e-local-tests, determine-evals]
-    if: needs.determine-evals.outputs.skip-all-evals != 'true'
+    if: needs.determine-evals.outputs.skip-all-evals != 'true' && needs.determine-evals.outputs.run-regression == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 9
     outputs:
diff --git a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
index f29f1ca90..73f69eaa3 100644
--- a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
+++ b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
@@ -4,6 +4,11 @@ import { getV3DynamicTestConfig } from "./v3.dynamic.config";
 import type { LogLine } from "../types/public/logs";
 
 test.describe("V3 Multi-Instance Logger Isolation", () => {
+  // Run tests serially to avoid resource exhaustion from creating many Chrome instances
+  test.describe.configure({ mode: 'serial' });
+  // Increase timeout for stress tests that create/destroy multiple instances
+  test.setTimeout(120_000);
+
   test("multiple V3 instances can be created concurrently without logger conflicts", async () => {
     const instanceCount = 5;
     const instances: V3[] = [];
diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
index f08f0db12..df89516ce 100644
--- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts
@@ -16,7 +16,9 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  workers: process.env.CI ? 3 : 4,
+  // Conservative parallelization for Browserbase: 2 workers in CI to avoid resource exhaustion.
+  // Browserbase tests are heavier due to remote browser connections.
+  workers: process.env.CI ? 2 : 3,
   fullyParallel: true,
   reporter: "list",
   use: {
diff --git a/packages/core/lib/v3/tests/v3.local.playwright.config.ts b/packages/core/lib/v3/tests/v3.local.playwright.config.ts
index 381bb258b..093744a3e 100644
--- a/packages/core/lib/v3/tests/v3.local.playwright.config.ts
+++ b/packages/core/lib/v3/tests/v3.local.playwright.config.ts
@@ -16,9 +16,9 @@ export default defineConfig({
   testDir: ".",
   timeout: 90_000,
   expect: { timeout: 10_000 },
-  // Increased from 2 to improve CI performance. Use environment variable to control.
-  // CI uses 4 workers, local development can use up to 8 for faster test runs.
-  workers: process.env.CI ? 4 : 6,
+  // Balanced parallelization: 3 workers in CI to avoid resource exhaustion while maintaining speed.
+  // Local development can use more workers for faster test runs.
+  workers: process.env.CI ? 3 : 5,
   fullyParallel: true,
   reporter: "list",
   use: {

From 4ecc64b5b4e9c9b25617f24ecc4c8cca4c14dea4 Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 14:31:19 -0500
Subject: [PATCH 5/7] fix lint

---
 CHANGELOG.md                                          | 11 +++++------
 .../core/lib/v3/tests/multi-instance-logger.spec.ts   |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 196d71f03..b37ce4d87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,17 +2,17 @@
 
 ## 3.0.0
 
-### Major Changes 
+### Major Changes
 
 - Removes internal Playwright dependency
 - A generous 20-40% speed increase across `act`, `extract`, & `observe` calls
 - Compatibility with Playwright, Puppeteer, and Patchright
 - Automatic action caching (agent, stagehand.act). Go from CUA → deterministic scripts w/o inference
 - A suite of non AI primitives:
-    - `page`
-    - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors)
-    - `frameLocator`
-    - `deepLocator` (crosses iframes & shadow roots)
+  - `page`
+  - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors)
+  - `frameLocator`
+  - `deepLocator` (crosses iframes & shadow roots)
 - bun compatibility
 - Simplified extract schemas
 - CSS selector support (id-based support coming soon)
@@ -21,7 +21,6 @@
 
 Check the [migration guide](https://docs.stagehand.dev/v3/migrations/v2) for more information
 
-
 ## 2.5.0
 
 ### Minor Changes
diff --git a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
index 73f69eaa3..b76545e4e 100644
--- a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
+++ b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts
@@ -5,7 +5,7 @@ import type { LogLine } from "../types/public/logs";
 
 test.describe("V3 Multi-Instance Logger Isolation", () => {
   // Run tests serially to avoid resource exhaustion from creating many Chrome instances
-  test.describe.configure({ mode: 'serial' });
+  test.describe.configure({ mode: "serial" });
   // Increase timeout for stress tests that create/destroy multiple instances
   test.setTimeout(120_000);
 

From 968eb94444388f124632b81f37de5914314b7c16 Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Fri, 7 Nov 2025 15:08:57 -0500
Subject: [PATCH 6/7] more ci improvements

---
 .github/workflows/ci.yml | 65 +++++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2477af42f..a611b4483 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -171,6 +171,15 @@ jobs:
       - name: Run Build
         run: pnpm run build
 
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-artifacts
+          path: |
+            packages/*/dist/
+            packages/*/lib/
+          retention-days: 1
+
   run-e2e-local-tests:
     needs: [run-lint, run-build]
     runs-on: ubuntu-latest
@@ -237,7 +246,7 @@ jobs:
 
   run-regression-evals:
     needs:
-      [run-e2e-bb-tests, run-e2e-local-tests, determine-evals]
+      [run-e2e-bb-tests, run-e2e-local-tests, run-build, determine-evals]
     if: needs.determine-evals.outputs.skip-all-evals != 'true' && needs.determine-evals.outputs.run-regression == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 9
@@ -268,8 +277,10 @@ jobs:
       - name: Install dependencies
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
-        run: pnpm run build
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run Regression Evals
         run: pnpm run evals category regression trials=2 concurrency=20 env=BROWSERBASE
@@ -291,7 +302,7 @@ jobs:
           fi
 
   run-combination-evals:
-    needs: [run-regression-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 40
     env:
@@ -334,9 +345,11 @@ jobs:
         if: needs.determine-evals.outputs.run-combination == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-combination == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run Combination Evals
         if: needs.determine-evals.outputs.run-combination == 'true'
@@ -357,7 +370,7 @@ jobs:
           fi
 
   run-act-evals:
-    needs: [run-combination-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 25
     env:
@@ -400,9 +413,11 @@ jobs:
         if: needs.determine-evals.outputs.run-act == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-act == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run Act Evals
         if: needs.determine-evals.outputs.run-act == 'true'
@@ -426,7 +441,7 @@ jobs:
           fi
 
   run-extract-evals:
-    needs: [run-act-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 50
     env:
@@ -469,9 +484,11 @@ jobs:
         if: needs.determine-evals.outputs.run-extract == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-extract == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       # 1. Run extract category with domExtract
       - name: Run Extract Evals (domExtract)
@@ -498,7 +515,7 @@ jobs:
           fi
 
   run-observe-evals:
-    needs: [run-extract-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     env:
@@ -541,9 +558,11 @@ jobs:
         if: needs.determine-evals.outputs.run-observe == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-observe == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run Observe Evals
         if: needs.determine-evals.outputs.run-observe == 'true'
@@ -567,7 +586,7 @@ jobs:
           fi
 
   run-targeted-extract-evals:
-    needs: [run-observe-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     env:
@@ -610,9 +629,11 @@ jobs:
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run targeted extract Evals
         if: needs.determine-evals.outputs.run-targeted-extract == 'true'
@@ -636,7 +657,7 @@ jobs:
           fi
 
   run-agent-evals:
-    needs: [run-targeted-extract-evals, determine-evals]
+    needs: [run-regression-evals, run-build, determine-evals]
     runs-on: ubuntu-latest
     timeout-minutes: 90 # Agent evals can be long-running
     env:
@@ -683,9 +704,11 @@ jobs:
         if: needs.determine-evals.outputs.run-agent == 'true'
         run: pnpm install --frozen-lockfile
 
-      - name: Build Stagehand
+      - name: Download build artifacts
         if: needs.determine-evals.outputs.run-agent == 'true'
-        run: pnpm run build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts
 
       - name: Run Agent Evals
         if: needs.determine-evals.outputs.run-agent == 'true'

From 8202603f8f1d04d87ea402b78f98bba75ccb273a Mon Sep 17 00:00:00 2001
From: Filip Michalsky <filip@paywithsoap.com>
Date: Tue, 11 Nov 2025 21:10:25 -0500
Subject: [PATCH 7/7] add changeset

---
 .changeset/proud-olives-burn.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/proud-olives-burn.md

diff --git a/.changeset/proud-olives-burn.md b/.changeset/proud-olives-burn.md
new file mode 100644
index 000000000..f7ba5145d
--- /dev/null
+++ b/.changeset/proud-olives-burn.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+make ci faster