diff --git a/.changeset/proud-olives-burn.md b/.changeset/proud-olives-burn.md new file mode 100644 index 000000000..f7ba5145d --- /dev/null +++ b/.changeset/proud-olives-burn.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +make ci faster diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85e976aba..4b4b02979 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,7 @@ jobs: outputs: core: ${{ steps.filter.outputs.core }} evals: ${{ steps.filter.outputs.evals }} + docs-only: ${{ steps.filter.outputs.docs-only }} steps: - name: Check out repository code uses: actions/checkout@v4 @@ -44,10 +45,17 @@ jobs: - 'packages/evals/**' - 'package.json' - 'pnpm-lock.yaml' + docs-only: + - '**/*.md' + - 'examples/**' + - '!packages/**/*.md' determine-evals: + needs: [determine-changes] runs-on: ubuntu-latest outputs: + skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }} + run-regression: ${{ steps.check-labels.outputs.run-regression }} run-combination: ${{ steps.check-labels.outputs.run-combination }} run-extract: ${{ steps.check-labels.outputs.run-extract }} run-act: ${{ steps.check-labels.outputs.run-act }} @@ -57,9 +65,39 @@ jobs: steps: - id: check-labels run: | + # Check if skip-evals label is present + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then + echo "skip-evals label found - skipping all evals" + echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-regression=false" >> $GITHUB_OUTPUT + echo "run-combination=false" >> $GITHUB_OUTPUT + echo "run-extract=false" >> $GITHUB_OUTPUT + echo "run-act=false" >> $GITHUB_OUTPUT + echo "run-observe=false" >> $GITHUB_OUTPUT + echo "run-targeted-extract=false" >> $GITHUB_OUTPUT + echo "run-agent=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Skip evals if only docs/examples changed (and not on main) + if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" && "${{ github.ref }}" != "refs/heads/main" ]]; then + echo "Only docs/examples changed - skipping evals" + echo "skip-all-evals=true" >> $GITHUB_OUTPUT + echo "run-regression=false" >> $GITHUB_OUTPUT + echo "run-combination=false" >> $GITHUB_OUTPUT + echo "run-extract=false" >> $GITHUB_OUTPUT + echo "run-act=false" >> $GITHUB_OUTPUT + echo "run-observe=false" >> $GITHUB_OUTPUT + echo "run-targeted-extract=false" >> $GITHUB_OUTPUT + echo "run-agent=false" >> $GITHUB_OUTPUT + exit 0 + fi + # Default to running all tests on main branch if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then echo "Running all tests for main branch" + echo "skip-all-evals=false" >> $GITHUB_OUTPUT + echo "run-regression=true" >> $GITHUB_OUTPUT echo "run-combination=true" >> $GITHUB_OUTPUT echo "run-extract=true" >> $GITHUB_OUTPUT echo "run-act=true" >> $GITHUB_OUTPUT @@ -69,7 +107,17 @@ jobs: exit 0 fi + # Check for skip-regression-evals label + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-regression-evals') }}" == "true" ]]; then + echo "skip-regression-evals label found - regression evals will be skipped" + echo "run-regression=false" >> $GITHUB_OUTPUT + else + echo "Regression evals will run by default" + echo "run-regression=true" >> $GITHUB_OUTPUT + fi + # Check for specific labels + echo "skip-all-evals=false" >> $GITHUB_OUTPUT echo "run-combination=${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" >> $GITHUB_OUTPUT echo "run-extract=${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" >> $GITHUB_OUTPUT echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT @@ -85,16 +133,17 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Run Lint run: pnpm run lint @@ -107,20 +156,30 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Run Build run: pnpm run build + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + path: | + packages/*/dist/ + packages/*/lib/ + retention-days: 1 + - name: Run Vitest run: pnpm --filter @browserbasehq/stagehand run test:vitest @@ -134,16 +193,17 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand run: pnpm run build @@ -169,16 +229,17 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build Stagehand run: pnpm run build @@ -188,7 +249,8 @@ jobs: run-regression-evals: needs: - [run-e2e-bb-tests, run-e2e-local-tests, determine-evals] + [run-e2e-bb-tests, run-e2e-local-tests, run-build, determine-evals] + if: needs.determine-evals.outputs.skip-all-evals != 'true' && needs.determine-evals.outputs.run-regression == 'true' runs-on: ubuntu-latest timeout-minutes: 9 outputs: @@ -206,19 +268,22 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Setup pnpm + uses: pnpm/action-setup@v4 + - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand - run: pnpm run build + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Regression Evals run: pnpm run evals category regression trials=2 concurrency=20 env=BROWSERBASE @@ -240,7 +305,7 @@ jobs: fi run-combination-evals: - needs: [run-regression-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 40 env: @@ -266,22 +331,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-combination == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-combination == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-combination == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-combination == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Combination Evals if: needs.determine-evals.outputs.run-combination == 'true' @@ -302,7 +373,7 @@ jobs: fi run-act-evals: - needs: [run-combination-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 25 env: @@ -328,22 +399,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-act == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-act == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-act == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-act == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Act Evals if: needs.determine-evals.outputs.run-act == 'true' @@ -367,7 +444,7 @@ jobs: fi run-extract-evals: - needs: [run-act-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 50 env: @@ -393,22 +470,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-extract == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-extract == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-extract == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-extract == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts # 1. Run extract category with domExtract - name: Run Extract Evals (domExtract) @@ -435,7 +518,7 @@ jobs: fi run-observe-evals: - needs: [run-extract-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 60 env: @@ -461,22 +544,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-observe == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-observe == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-observe == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-observe == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Observe Evals if: needs.determine-evals.outputs.run-observe == 'true' @@ -500,7 +589,7 @@ jobs: fi run-targeted-extract-evals: - needs: [run-observe-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 60 env: @@ -526,22 +615,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-targeted-extract == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-targeted-extract == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-targeted-extract == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-targeted-extract == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run targeted extract Evals if: needs.determine-evals.outputs.run-targeted-extract == 'true' @@ -565,7 +660,7 @@ jobs: fi run-agent-evals: - needs: [run-targeted-extract-evals, determine-evals] + needs: [run-regression-evals, run-build, determine-evals] runs-on: ubuntu-latest timeout-minutes: 90 # Agent evals can be long-running env: @@ -595,22 +690,28 @@ jobs: echo "has_label=true" >> $GITHUB_OUTPUT fi + - name: Setup pnpm + if: needs.determine-evals.outputs.run-agent == 'true' + uses: pnpm/action-setup@v4 + with: + version: 9 + - name: Set up Node.js if: needs.determine-evals.outputs.run-agent == 'true' uses: actions/setup-node@v4 with: node-version: "20" + cache: "pnpm" - name: Install dependencies if: needs.determine-evals.outputs.run-agent == 'true' - run: | - rm -rf node_modules - npm i -g pnpm - pnpm install --no-frozen-lockfile + run: pnpm install --frozen-lockfile - - name: Build Stagehand + - name: Download build artifacts if: needs.determine-evals.outputs.run-agent == 'true' - run: pnpm run build + uses: actions/download-artifact@v4 + with: + name: build-artifacts - name: Run Agent Evals if: needs.determine-evals.outputs.run-agent == 'true' diff --git a/CHANGELOG.md b/CHANGELOG.md index 196d71f03..b37ce4d87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,17 +2,17 @@ ## 3.0.0 -### Major Changes +### Major Changes - Removes internal Playwright dependency - A generous 20-40% speed increase across `act`, `extract`, & `observe` calls - Compatibility with Playwright, Puppeteer, and Patchright - Automatic action caching (agent, stagehand.act). Go from CUA → deterministic scripts w/o inference - A suite of non AI primitives: - - `page` - - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors) - - `frameLocator` - - `deepLocator` (crosses iframes & shadow roots) + - `page` + - `locator` (built in closed mode shadow root traversal, with xpaths & css selectors) + - `frameLocator` + - `deepLocator` (crosses iframes & shadow roots) - bun compatibility - Simplified extract schemas - CSS selector support (id-based support coming soon) @@ -21,7 +21,6 @@ Check the [migration guide](https://docs.stagehand.dev/v3/migrations/v2) for more information - ## 2.5.0 ### Minor Changes diff --git a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts index f29f1ca90..b76545e4e 100644 --- a/packages/core/lib/v3/tests/multi-instance-logger.spec.ts +++ b/packages/core/lib/v3/tests/multi-instance-logger.spec.ts @@ -4,6 +4,11 @@ import { getV3DynamicTestConfig } from "./v3.dynamic.config"; import type { LogLine } from "../types/public/logs"; test.describe("V3 Multi-Instance Logger Isolation", () => { + // Run tests serially to avoid resource exhaustion from creating many Chrome instances + test.describe.configure({ mode: "serial" }); + // Increase timeout for stress tests that create/destroy multiple instances + test.setTimeout(120_000); + test("multiple V3 instances can be created concurrently without logger conflicts", async () => { const instanceCount = 5; const instances: V3[] = []; diff --git a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts index e7ea4bf5a..df89516ce 100644 --- a/packages/core/lib/v3/tests/v3.bb.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.bb.playwright.config.ts @@ -16,7 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - workers: 2, + // Conservative parallelization for Browserbase: 2 workers in CI to avoid resource exhaustion. + // Browserbase tests are heavier due to remote browser connections. + workers: process.env.CI ? 2 : 3, fullyParallel: true, reporter: "list", use: { diff --git a/packages/core/lib/v3/tests/v3.local.playwright.config.ts b/packages/core/lib/v3/tests/v3.local.playwright.config.ts index 67294f8a1..093744a3e 100644 --- a/packages/core/lib/v3/tests/v3.local.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.local.playwright.config.ts @@ -16,8 +16,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance. - workers: 2, + // Balanced parallelization: 3 workers in CI to avoid resource exhaustion while maintaining speed. + // Local development can use more workers for faster test runs. + workers: process.env.CI ? 3 : 5, fullyParallel: true, reporter: "list", use: { diff --git a/packages/core/lib/v3/tests/v3.playwright.config.ts b/packages/core/lib/v3/tests/v3.playwright.config.ts index 8db19a1d6..f9745c077 100644 --- a/packages/core/lib/v3/tests/v3.playwright.config.ts +++ b/packages/core/lib/v3/tests/v3.playwright.config.ts @@ -4,8 +4,9 @@ export default defineConfig({ testDir: ".", timeout: 90_000, expect: { timeout: 10_000 }, - // Keep single-worker until the suite is stable; parallel browsers can clash with a single V3 instance. - workers: 2, + // Increased from 2 to improve CI performance. Use environment variable to control. + // CI uses 4 workers, local development can use up to 8 for faster test runs. + workers: process.env.CI ? 4 : 6, fullyParallel: true, reporter: "list", use: {