From 083bc81874b0f26b7cbcbba05f677fca2ce2755c Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 13:34:31 -0500 Subject: [PATCH 1/7] work on making ci faster --- .github/workflows/ci.yml | 163 +++++++++++++----- .../lib/v3/tests/v3.bb.playwright.config.ts | 4 +- .../v3/tests/v3.local.playwright.config.ts | 5 +- .../core/lib/v3/tests/v3.playwright.config.ts | 5 +- 4 files changed, 128 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d8bbe6657..bcb2e90ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,7 @@ jobs: outputs: core: ${{ steps.filter.outputs.core }} evals: ${{ steps.filter.outputs.evals }} + docs-only: ${{ steps.filter.outputs.docs-only }} steps: - name: Check out repository code uses: actions/checkout@v4 @@ -44,10 +45,16 @@ jobs: - 'packages/evals/**' - 'package.json' - 'pnpm-lock.yaml' + docs-only: + - '**/*.md' + - 'examples/**' + - '!packages/**/*.md' determine-evals: + needs: [determine-changes] runs-on: ubuntu-latest outputs: + skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }} run-combination: ${{ steps.check-labels.outputs.run-combination }} run-extract: ${{ steps.check-labels.outputs.run-extract }} run-act: ${{ steps.check-labels.outputs.run-act }} @@ -57,9 +64,36 @@ jobs: steps: - id: check-labels run: | + # Check if skip-evals label is present + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then + echo "skip-evals label found - skipping all evals" + echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-combination=false" >> $GITHUB_OUTPUT + echo "run-extract=false" >> $GITHUB_OUTPUT + echo "run-act=false" >> $GITHUB_OUTPUT + echo "run-observe=false" >> $GITHUB_OUTPUT + echo "run-targeted-extract=false" >> $GITHUB_OUTPUT + echo "run-agent=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Skip evals if only docs/examples changed (and not on main) + if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" && "${{ github.ref }}" != "refs/heads/main" ]]; then + echo "Only docs/examples changed - skipping evals" + echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-combination=false" >> $GITHUB_OUTPUT + echo "run-extract=false" >> $GITHUB_OUTPUT + echo "run-act=false" >> $GITHUB_OUTPUT + echo "run-observe=false" >> $GITHUB_OUTPUT + echo "run-targeted-extract=false" >> $GITHUB_OUTPUT + echo "run-agent=false" >> $GITHUB_OUTPUT + exit 0 + fi + # Default to running all tests on main branch if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then echo "Running all tests for main branch" + echo "skip-all-evals=false" >> $GITHUB_OUTPUT echo "run-combination=true" >> $GITHUB_OUTPUT echo "run-extract=true" >> $GITHUB_OUTPUT echo "run-act=true" >> $GITHUB_OUTPUT @@ -70,6 +104,7 @@ jobs: fi # Check for specific labels + echo "skip-all-evals=false" >> $GITHUB_OUTPUT echo "run-combination=${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" >> $GITHUB_OUTPUT echo "run-extract=${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" >> $GITHUB_OUTPUT echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT @@ -85,16 +120,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Run Lint run: pnpm run lint @@ -107,16 +145,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Run Build run: pnpm run build @@ -131,16 +172,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand run: pnpm run build @@ -166,16 +210,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand run: pnpm run build @@ -186,6 +233,7 @@ jobs: run-regression-evals: needs: [run-e2e-bb-tests, run-e2e-local-tests, determine-evals] + if: needs.determine-evals.outputs.skip-all-evals != 'true' runs-on: ubuntu-latest timeout-minutes: 9 outputs: @@ -203,16 +251,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand run: pnpm run build @@ -263,18 +314,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-combination == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-combination == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-combination == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-combination == 'true' @@ -325,18 +380,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-act == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-act == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-act == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-act == 'true' @@ -390,18 +449,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-extract == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-extract == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-extract == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-extract == 'true' @@ -458,18 +521,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-observe == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-observe == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-observe == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-observe == 'true' @@ -523,18 +590,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-targeted-extract == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-targeted-extract == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-targeted-extract == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-targeted-extract == 'true' @@ -592,18 +663,22 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-agent == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-agent == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-agent == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand if: needs.determine-evals.outputs.run-agent == 'true' diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts index e7ea4bf5a..1dddf8c92 100644 --- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts @@ -16,7 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - workers: 2, + // Conservative worker count for Browserbase tests (external service dependency) + // Can be increased if Browserbase API handles higher concurrency well + workers: process.env.CI ? 3 : 4, fullyParallel: true, reporter: "list", use: { diff --git a/packages/core/lib/v3/tests/v3.local.playwright.config.ts b/packages/core/lib/v3/tests/v3.local.playwright.config.ts index 67294f8a1..381bb258b 100644 --- a/packages/core/lib/v3/tests/v3.local.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.local.playwright.config.ts @@ -16,8 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance. - workers: 2, + // Increased from 2 to improve CI performance. Use environment variable to control. + // CI uses 4 workers, local development can use up to 8 for faster test runs. + workers: process.env.CI ? 4 : 6, fullyParallel: true, reporter: "list", use: { diff --git a/packages/core/lib/v3/tests/v3.playwright.config.ts b/packages/core/lib/v3/tests/v3.playwright.config.ts index 8db19a1d6..f9745c077 100644 --- a/packages/core/lib/v3/tests/v3.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.playwright.config.ts @@ -4,8 +4,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance. - workers: 2, + // Increased from 2 to improve CI performance. Use environment variable to control. + // CI uses 4 workers, local development can use up to 8 for faster test runs. + workers: process.env.CI ? 4 : 6, fullyParallel: true, reporter: "list", use: { From 5701a0635dd59e9f64bec28362df93953773998e Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 13:42:22 -0500 Subject: [PATCH 2/7] remove comment --- packages/core/lib/v3/tests/v3.bb.playwright.config.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts index 1dddf8c92..f08f0db12 100644 --- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts @@ -16,8 +16,6 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Conservative worker count for Browserbase tests (external service dependency) - // Can be increased if Browserbase API handles higher concurrency well workers: process.env.CI ? 3 : 4, fullyParallel: true, reporter: "list", From 31c1108c33eac127c1906b997757ba7c57737952 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 13:47:18 -0500 Subject: [PATCH 3/7] remove pnpm version conflict --- .github/workflows/ci.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcb2e90ac..448abebea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -122,8 +122,6 @@ jobs: - name: Setup pnpm uses: pnpm/action-setup@v4 - with: - version: 9 - name: Set up Node.js uses: actions/setup-node@v4 @@ -147,8 +145,6 @@ jobs: - name: Setup pnpm uses: pnpm/action-setup@v4 - with: - version: 9 - name: Set up Node.js uses: actions/setup-node@v4 @@ -174,8 +170,6 @@ jobs: - name: Setup pnpm uses: pnpm/action-setup@v4 - with: - version: 9 - name: Set up Node.js uses: actions/setup-node@v4 @@ -212,8 +206,6 @@ jobs: - name: Setup pnpm uses: pnpm/action-setup@v4 - with: - version: 9 - name: Set up Node.js uses: actions/setup-node@v4 @@ -253,8 +245,6 @@ jobs: - name: Setup pnpm uses: pnpm/action-setup@v4 - with: - version: 9 - name: Set up Node.js uses: actions/setup-node@v4 From 3e782497ffb5098fb8dcb5577c6248335644330f Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 14:25:40 -0500 Subject: [PATCH 4/7] scale back concurrency --- .github/workflows/ci.yml | 15 ++++++++++++++- .../lib/v3/tests/multi-instance-logger.spec.ts | 5 +++++ .../core/lib/v3/tests/v3.bb.playwright.config.ts | 4 +++- .../lib/v3/tests/v3.local.playwright.config.ts | 6 +++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 448abebea..2477af42f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,6 +55,7 @@ jobs: runs-on: ubuntu-latest outputs: skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }} + run-regression: ${{ steps.check-labels.outputs.run-regression }} run-combination: ${{ steps.check-labels.outputs.run-combination }} run-extract: ${{ steps.check-labels.outputs.run-extract }} run-act: ${{ steps.check-labels.outputs.run-act }} @@ -68,6 +69,7 @@ jobs: if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then echo "skip-evals label found - skipping all evals" echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-regression=false" >> $GITHUB_OUTPUT echo "run-combination=false" >> $GITHUB_OUTPUT echo "run-extract=false" >> $GITHUB_OUTPUT echo "run-act=false" >> $GITHUB_OUTPUT @@ -81,6 +83,7 @@ jobs: if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" && "${{ github.ref }}" != "refs/heads/main" ]]; then echo "Only docs/examples changed - skipping evals" echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-regression=false" >> $GITHUB_OUTPUT echo "run-combination=false" >> $GITHUB_OUTPUT echo "run-extract=false" >> $GITHUB_OUTPUT echo "run-act=false" >> $GITHUB_OUTPUT @@ -94,6 +97,7 @@ jobs: if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then echo "Running all tests for main branch" echo "skip-all-evals=false" >> $GITHUB_OUTPUT + echo "run-regression=true" >> $GITHUB_OUTPUT echo "run-combination=true" >> $GITHUB_OUTPUT echo "run-extract=true" >> $GITHUB_OUTPUT echo "run-act=true" >> $GITHUB_OUTPUT @@ -103,6 +107,15 @@ jobs: exit 0 fi + # Check for skip-regression-evals label + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-regression-evals') }}" == "true" ]]; then + echo "skip-regression-evals label found - regression evals will be skipped" + echo "run-regression=false" >> $GITHUB_OUTPUT + else + echo "Regression evals will run by default" + echo "run-regression=true" >> $GITHUB_OUTPUT + fi + # Check for specific labels echo "skip-all-evals=false" >> $GITHUB_OUTPUT echo "run-combination=${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" >> $GITHUB_OUTPUT @@ -225,7 +238,7 @@ jobs: run-regression-evals: needs: [run-e2e-bb-tests, run-e2e-local-tests, determine-evals] - if: needs.determine-evals.outputs.skip-all-evals != 'true' + if: needs.determine-evals.outputs.skip-all-evals != 'true' && needs.determine-evals.outputs.run-regression == 'true' runs-on: ubuntu-latest timeout-minutes: 9 outputs: diff --git a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts index f29f1ca90..73f69eaa3 100644 --- a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts +++ b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts @@ -4,6 +4,11 @@ import { getV3DynamicTestConfig } from "./v3.dynamic.config"; import type { LogLine } from "../types/public/logs"; test.describe("V3 Multi-Instance Logger Isolation", () => { + // Run tests serially to avoid resource exhaustion from creating many Chrome instances + test.describe.configure({ mode: 'serial' }); + // Increase timeout for stress tests that create/destroy multiple instances + test.setTimeout(120_000); + test("multiple V3 instances can be created concurrently without logger conflicts", async () => { const instanceCount = 5; const instances: V3[] = []; diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts index f08f0db12..df89516ce 100644 --- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts @@ -16,7 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - workers: process.env.CI ? 3 : 4, + // Conservative parallelization for Browserbase: 2 workers in CI to avoid resource exhaustion. + // Browserbase tests are heavier due to remote browser connections. + workers: process.env.CI ? 2 : 3, fullyParallel: true, reporter: "list", use: { diff --git a/packages/core/lib/v3/tests/v3.local.playwright.config.ts b/packages/core/lib/v3/tests/v3.local.playwright.config.ts index 381bb258b..093744a3e 100644 --- a/packages/core/lib/v3/tests/v3.local.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.local.playwright.config.ts @@ -16,9 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Increased from 2 to improve CI performance. Use environment variable to control. - // CI uses 4 workers, local development can use up to 8 for faster test runs. - workers: process.env.CI ? 4 : 6, + // Balanced parallelization: 3 workers in CI to avoid resource exhaustion while maintaining speed. + // Local development can use more workers for faster test runs. + workers: process.env.CI ? 3 : 5, fullyParallel: true, reporter: "list", use: { From 4ecc64b5b4e9c9b25617f24ecc4c8cca4c14dea4 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 14:31:19 -0500 Subject: [PATCH 5/7] fix lint --- CHANGELOG.md | 11 +++++------ .../core/lib/v3/tests/multi-instance-logger.spec.ts | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 196d71f03..b37ce4d87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,17 +2,17 @@ ## 3.0.0 -### Major Changes +### Major Changes - Removes internal Playwright dependency - A generous 20-40% speed increase across `act`, `extract`, & `observe` calls - Compatibility with Playwright, Puppeteer, and Patchright - Automatic action caching (agent, stagehand.act). Go from CUA → deterministic scripts w/o inference - A suite of non AI primitives: - - `page` - - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors) - - `frameLocator` - - `deepLocator` (crosses iframes & shadow roots) + - `page` + - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors) + - `frameLocator` + - `deepLocator` (crosses iframes & shadow roots) - bun compatibility - Simplified extract schemas - CSS selector support (id-based support coming soon) @@ -21,7 +21,6 @@ Check the [migration guide](https://docs.stagehand.dev/v3/migrations/v2) for more information - ## 2.5.0 ### Minor Changes diff --git a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts index 73f69eaa3..b76545e4e 100644 --- a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts +++ b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts @@ -5,7 +5,7 @@ import type { LogLine } from "../types/public/logs"; test.describe("V3 Multi-Instance Logger Isolation", () => { // Run tests serially to avoid resource exhaustion from creating many Chrome instances - test.describe.configure({ mode: 'serial' }); + test.describe.configure({ mode: "serial" }); // Increase timeout for stress tests that create/destroy multiple instances test.setTimeout(120_000); From 968eb94444388f124632b81f37de5914314b7c16 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 7 Nov 2025 15:08:57 -0500 Subject: [PATCH 6/7] more ci improvements --- .github/workflows/ci.yml | 65 +++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2477af42f..a611b4483 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,6 +171,15 @@ jobs: - name: Run Build run: pnpm run build + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + path: | + packages/*/dist/ + packages/*/lib/ + retention-days: 1 + run-e2e-local-tests: needs: [run-lint, run-build] runs-on: ubuntu-latest @@ -237,7 +246,7 @@ jobs: run-regression-evals: needs: - [run-e2e-bb-tests, run-e2e-local-tests, determine-evals] + [run-e2e-bb-tests, run-e2e-local-tests, run-build, determine-evals] if: needs.determine-evals.outputs.skip-all-evals != 'true' && needs.determine-evals.outputs.run-regression == 'true' runs-on: ubuntu-latest timeout-minutes: 9 @@ -268,8 +277,10 @@ jobs: - name: Install dependencies run: pnpm install --frozen-lockfile - - name: Build Stagehand - run: pnpm run build + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Regression Evals run: pnpm run evals category regression trials=2 concurrency=20 env=BROWSERBASE @@ -291,7 +302,7 @@ jobs: fi run-combination-evals: - needs: [run-regression-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 40 env: @@ -334,9 +345,11 @@ jobs: if: needs.determine-evals.outputs.run-combination == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-combination == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Combination Evals if: needs.determine-evals.outputs.run-combination == 'true' @@ -357,7 +370,7 @@ jobs: fi run-act-evals: - needs: [run-combination-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 25 env: @@ -400,9 +413,11 @@ jobs: if: needs.determine-evals.outputs.run-act == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-act == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Act Evals if: needs.determine-evals.outputs.run-act == 'true' @@ -426,7 +441,7 @@ jobs: fi run-extract-evals: - needs: [run-act-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 50 env: @@ -469,9 +484,11 @@ jobs: if: needs.determine-evals.outputs.run-extract == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-extract == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts # 1. Run extract category with domExtract - name: Run Extract Evals (domExtract) @@ -498,7 +515,7 @@ jobs: fi run-observe-evals: - needs: [run-extract-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 60 env: @@ -541,9 +558,11 @@ jobs: if: needs.determine-evals.outputs.run-observe == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-observe == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Observe Evals if: needs.determine-evals.outputs.run-observe == 'true' @@ -567,7 +586,7 @@ jobs: fi run-targeted-extract-evals: - needs: [run-observe-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 60 env: @@ -610,9 +629,11 @@ jobs: if: needs.determine-evals.outputs.run-targeted-extract == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-targeted-extract == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run targeted extract Evals if: needs.determine-evals.outputs.run-targeted-extract == 'true' @@ -636,7 +657,7 @@ jobs: fi run-agent-evals: - needs: [run-targeted-extract-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 90 # Agent evals can be long-running env: @@ -683,9 +704,11 @@ jobs: if: needs.determine-evals.outputs.run-agent == 'true' run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-agent == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Agent Evals if: needs.determine-evals.outputs.run-agent == 'true' From 8202603f8f1d04d87ea402b78f98bba75ccb273a Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Tue, 11 Nov 2025 21:10:25 -0500 Subject: [PATCH 7/7] add changeset --- .changeset/proud-olives-burn.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/proud-olives-burn.md diff --git a/.changeset/proud-olives-burn.md b/.changeset/proud-olives-burn.md new file mode 100644 index 000000000..f7ba5145d --- /dev/null +++ b/.changeset/proud-olives-burn.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +make ci faster