diff --git a/.changeset/icy-toes-obey.md b/.changeset/icy-toes-obey.md new file mode 100644 index 000000000..76ba1e561 --- /dev/null +++ b/.changeset/icy-toes-obey.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Add playwright arguments to agent execute response diff --git a/.changeset/loud-waves-think.md b/.changeset/loud-waves-think.md new file mode 100644 index 000000000..32d507247 --- /dev/null +++ b/.changeset/loud-waves-think.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +adds support for stagehand agent in the api diff --git a/.changeset/many-rats-punch.md b/.changeset/many-rats-punch.md new file mode 100644 index 000000000..2453a6967 --- /dev/null +++ b/.changeset/many-rats-punch.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Fix for zod peer dependency support diff --git a/docs/configuration/browser.mdx b/docs/configuration/browser.mdx index 501a80f40..97683ee0a 100644 --- a/docs/configuration/browser.mdx +++ b/docs/configuration/browser.mdx @@ -114,7 +114,7 @@ stagehand = Stagehand( apiKey: process.env.BROWSERBASE_API_KEY, projectId: process.env.BROWSERBASE_PROJECT_ID, browserbaseSessionCreateParams: { - projectId: process.env.BROWSERBASE_PROJECT_ID!, + projectId: process.env.BROWSERBASE_PROJECT_ID!, // Optional: automatically set if given in environment variable or by Stagehand parameter proxies: true, region: "us-west-2", timeout: 3600, // 1 hour session timeout @@ -124,17 +124,11 @@ stagehand = Stagehand( blockAds: true, solveCaptchas: true, recordSession: false, + os: "windows", // Valid: "windows" | "mac" | "linux" | "mobile" | "tablet" viewport: { width: 1920, height: 1080, }, - fingerprint: { - browsers: ["chrome", "edge"], - devices: ["desktop"], - operatingSystems: ["windows", "macos"], - locales: ["en-US", "en-GB"], - httpVersion: 2, - }, }, userMetadata: { userId: "automation-user-123", @@ -149,7 +143,7 @@ stagehand = Stagehand( api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), browserbase_session_create_params={ - "project_id": os.getenv("BROWSERBASE_PROJECT_ID"), + "project_id": os.getenv("BROWSERBASE_PROJECT_ID"), # Optional: automatically set if given in environment or by Stagehand parameter "proxies": True, "region": "us-west-2", "timeout": 3600, # 1 hour session timeout @@ -159,17 +153,11 @@ stagehand = Stagehand( "block_ads": True, "solve_captchas": True, "record_session": False, + "os": "windows", # "windows" | "mac" | "linux" | "mobile" | "tablet" "viewport": { "width": 1920, "height": 1080, }, - "fingerprint": { - "browsers": ["chrome", "edge"], - "devices": ["desktop"], - "operating_systems": ["windows", "macos"], - "locales": ["en-US", "en-GB"], - "http_version": 2, - }, }, "user_metadata": { "user_id": "automation-user-123", diff --git a/docs/configuration/models.mdx b/docs/configuration/models.mdx index d4cd30b6e..07472a8b6 100644 --- a/docs/configuration/models.mdx +++ b/docs/configuration/models.mdx @@ -168,7 +168,7 @@ Vercel AI SDK supports providers for OpenAI, Anthropic, and Google, along with s To get started, you'll need to install the `ai` package and the provider you want to use. For example, to use Amazon Bedrock, you'll need to install the `@ai-sdk/amazon-bedrock` package. -You'll also need to use the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/main/examples/external_clients/aisdk.ts) as a template to create a client for your model. +You'll also need to import the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/main/lib/llm/aisdk.ts) which is exposed as `AISdkClient` to create a client for your model. @@ -190,13 +190,13 @@ You'll also need to use the [Vercel AI SDK external client](https://github.com/b -To get started, you can use the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/84f810b4631291307a32a47addad7e26e9c1deb3/examples/external_clients/aisdk.ts) as a template to create a client for your model. +To get started, you can use the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/main/lib/llm/aisdk.ts) which is exposed as `AISdkClient` to create a client for your model. ```ts // Install/import the provider you want to use. // For example, to use OpenAI, import `openai` from @ai-sdk/openai import { bedrock } from "@ai-sdk/amazon-bedrock"; -import { AISdkClient } from "./external_clients/aisdk"; +import { AISdkClient } from "@browserbasehq/stagehand"; const stagehand = new Stagehand({ llmClient: new AISdkClient({ diff --git a/docs/configuration/observability.mdx b/docs/configuration/observability.mdx index 8482c9e8e..9527ee316 100644 --- a/docs/configuration/observability.mdx +++ b/docs/configuration/observability.mdx @@ -529,4 +529,66 @@ Each operation creates detailed logs for analysis: +## Action History Tracking + +Track all Stagehand operations with the built-in action history feature. The `stagehand.history` property provides a chronological record of every method call during your automation session. + +### Accessing History Data + +Get a complete history of all Stagehand operations performed in the current session: + + +```typescript TypeScript +import { Stagehand } from "@browserbasehq/stagehand"; + +const stagehand = new Stagehand({ env: "LOCAL" }); +await stagehand.init(); + +// Perform various operations +await stagehand.page.goto("https://example.com"); +await stagehand.page.act("click the login button"); +const userInfo = await stagehand.page.extract("extract user profile data"); +const elements = await stagehand.page.observe("find all navigation links"); + +// Access complete operation history +const history = stagehand.history; +console.log('Total operations:', history.length); + +// Examine individual entries +history.forEach((entry, index) => { + console.log(`Operation ${index + 1}:`, { + method: entry.method, + timestamp: entry.timestamp, + hasResult: entry.result !== null + }); +}); +``` + +```python Python +from stagehand import Stagehand + +stagehand = Stagehand(env="LOCAL") +await stagehand.init() + +# Perform various operations +await stagehand.page.goto("https://example.com") +await stagehand.page.act("click the login button") +user_info = await stagehand.page.extract("extract user profile data") +elements = await stagehand.page.observe("find all navigation links") + +# Access complete operation history +history = stagehand.history +print(f'Total operations: {len(history)}') + +# Examine individual entries +for index, entry in enumerate(history): + print(f"Operation {index + 1}:", { + 'method': entry['method'], + 'timestamp': entry['timestamp'], + 'has_result': entry['result'] is not None + }) +``` + + + For detailed logging and debugging capabilities, see [Logging](/configuration/logging). \ No newline at end of file diff --git a/evals/evals.config.json b/evals/evals.config.json index 5c27ebeb7..6b4c02ce6 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -839,6 +839,12 @@ "categories": [ "external_agent_benchmarks" ] + }, + { + "name": "screenshot_cdp_toggle", + "categories": [ + "regression" + ] } ] } \ No newline at end of file diff --git a/evals/tasks/screenshot_cdp_toggle.ts b/evals/tasks/screenshot_cdp_toggle.ts new file mode 100644 index 000000000..a4c923a44 --- /dev/null +++ b/evals/tasks/screenshot_cdp_toggle.ts @@ -0,0 +1,239 @@ +import { EvalFunction } from "@/types/evals"; + +/** + * Test the useCDP flag for screenshot functionality in Browserbase environments. + * This test verifies that: + * 1. Screenshots work with CDP (useCDP: true) + * 2. Screenshots work with Playwright fallback (useCDP: false) + * 3. Options are properly passed through in both modes + */ +export const screenshot_cdp_toggle: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + try { + // Navigate to a test page + await stagehand.page.goto("https://example.com"); + + logger.log({ + message: "Testing screenshot with CDP enabled", + level: 1, + }); + + // Test 1: Screenshot with CDP + const cdpScreenshot = await stagehand.page.screenshot({ + fullPage: true, + useCDP: true, + }); + + if (!cdpScreenshot || cdpScreenshot.length === 0) { + logger.error({ + message: "CDP screenshot failed", + level: 0, + auxiliary: { + size: { + value: cdpScreenshot ? cdpScreenshot.length.toString() : "null", + type: "string", + }, + }, + }); + return { + _success: false, + error: "CDP screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `CDP screenshot successful: ${cdpScreenshot.length} bytes`, + level: 1, + }); + + logger.log({ + message: "Testing screenshot with Playwright (CDP disabled)", + level: 1, + }); + + // Test 2: Screenshot with Playwright + const playwrightScreenshot = await stagehand.page.screenshot({ + fullPage: true, + useCDP: false, + }); + + if (!playwrightScreenshot || playwrightScreenshot.length === 0) { + logger.error({ + message: "Playwright screenshot failed", + level: 0, + auxiliary: { + size: { + value: playwrightScreenshot + ? playwrightScreenshot.length.toString() + : "null", + type: "string", + }, + }, + }); + return { + _success: false, + error: "Playwright screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `Playwright screenshot successful: ${playwrightScreenshot.length} bytes`, + level: 1, + }); + + // Test 3: Test with additional options (JPEG format) + logger.log({ + message: "Testing screenshot with JPEG format and quality settings", + level: 1, + }); + + const jpegScreenshot = await stagehand.page.screenshot({ + type: "jpeg", + quality: 80, + useCDP: false, + }); + + if (!jpegScreenshot || jpegScreenshot.length === 0) { + logger.error({ + message: "JPEG screenshot failed", + level: 0, + }); + return { + _success: false, + error: "JPEG screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `JPEG screenshot successful: ${jpegScreenshot.length} bytes`, + level: 1, + }); + + // Test 4: Test with clip option + logger.log({ + message: "Testing screenshot with clip region", + level: 1, + }); + + const clippedScreenshot = await stagehand.page.screenshot({ + clip: { x: 0, y: 0, width: 500, height: 300 }, + useCDP: true, + }); + + if (!clippedScreenshot || clippedScreenshot.length === 0) { + logger.error({ + message: "Clipped screenshot failed", + level: 0, + }); + return { + _success: false, + error: "Clipped screenshot produced empty result", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + // Verify clipped screenshot is smaller than full page + if (clippedScreenshot.length >= cdpScreenshot.length) { + logger.error({ + message: "Clipped screenshot is not smaller than full screenshot", + level: 0, + auxiliary: { + clipped_size: { + value: clippedScreenshot.length.toString(), + type: "integer", + }, + full_size: { + value: cdpScreenshot.length.toString(), + type: "integer", + }, + }, + }); + return { + _success: false, + error: "Clipped screenshot size validation failed", + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + logger.log({ + message: `Clipped screenshot successful: ${clippedScreenshot.length} bytes`, + level: 1, + }); + + logger.log({ + message: "All screenshot tests passed successfully", + level: 0, + auxiliary: { + cdp_size: { + value: cdpScreenshot.length.toString(), + type: "integer", + }, + playwright_size: { + value: playwrightScreenshot.length.toString(), + type: "integer", + }, + jpeg_size: { + value: jpegScreenshot.length.toString(), + type: "integer", + }, + clipped_size: { + value: clippedScreenshot.length.toString(), + type: "integer", + }, + }, + }); + + return { + _success: true, + cdpSize: cdpScreenshot.length, + playwrightSize: playwrightScreenshot.length, + jpegSize: jpegScreenshot.length, + clippedSize: clippedScreenshot.length, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + logger.error({ + message: "Screenshot CDP toggle test failed", + level: 0, + auxiliary: { + error: { + value: error.message || String(error), + type: "string", + }, + stack: { + value: error.stack || "", + type: "string", + }, + }, + }); + + return { + _success: false, + error: error.message || String(error), + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 2f9a1acc9..b67921e26 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -1,7 +1,11 @@ import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright"; import { selectors } from "playwright"; import { z } from "zod/v3"; -import { Page, defaultExtractSchema } from "../types/page"; +import { + Page, + defaultExtractSchema, + StagehandScreenshotOptions, +} from "../types/page"; import { ExtractOptions, ExtractResult, @@ -415,37 +419,41 @@ ${scriptContent} \ } // Handle screenshots with CDP - if (prop === "screenshot" && this.stagehand.env === "BROWSERBASE") { - return async ( - options: { - type?: "png" | "jpeg"; - quality?: number; - fullPage?: boolean; - clip?: { x: number; y: number; width: number; height: number }; - omitBackground?: boolean; - } = {}, - ) => { - const cdpOptions: Record = { - format: options.type === "jpeg" ? "jpeg" : "png", - quality: options.quality, - clip: options.clip, - omitBackground: options.omitBackground, - fromSurface: true, - }; - - if (options.fullPage) { - cdpOptions.captureBeyondViewport = true; - } + if (prop === "screenshot") { + return async (options: StagehandScreenshotOptions = {}) => { + const rawScreenshot: typeof target.screenshot = + Object.getPrototypeOf(target).screenshot.bind(target); + + const { + useCDP = this.stagehand.env === "BROWSERBASE", + ...playwrightOptions + } = options; + + if (useCDP && this.stagehand.env === "BROWSERBASE") { + const cdpOptions: Record = { + format: options.type === "jpeg" ? "jpeg" : "png", + quality: options.quality, + clip: options.clip, + omitBackground: options.omitBackground, + fromSurface: true, + }; + + if (options.fullPage) { + cdpOptions.captureBeyondViewport = true; + } - const data = await this.sendCDP<{ data: string }>( - "Page.captureScreenshot", - cdpOptions, - ); + const data = await this.sendCDP<{ data: string }>( + "Page.captureScreenshot", + cdpOptions, + ); - // Convert base64 to buffer - const buffer = Buffer.from(data.data, "base64"); + // Convert base64 to buffer + const buffer = Buffer.from(data.data, "base64"); - return buffer; + return buffer; + } else { + return await rawScreenshot(playwrightOptions); + } }; } diff --git a/lib/agent/tools/act.ts b/lib/agent/tools/act.ts index b2a089192..a5b613050 100644 --- a/lib/agent/tools/act.ts +++ b/lib/agent/tools/act.ts @@ -1,7 +1,8 @@ import { tool } from "ai"; import { z } from "zod/v3"; import { StagehandPage } from "../../StagehandPage"; - +import { buildActObservePrompt } from "../../prompt"; +import { SupportedPlaywrightAction } from "@/types/act"; export const createActTool = ( stagehandPage: StagehandPage, executionModel?: string, @@ -19,37 +20,91 @@ export const createActTool = ( }), execute: async ({ action }) => { try { - let result; - if (executionModel) { - result = await stagehandPage.page.act({ - action, - modelName: executionModel, - }); - } else { - result = await stagehandPage.page.act(action); + const builtPrompt = buildActObservePrompt( + action, + Object.values(SupportedPlaywrightAction), + ); + + const observeOptions = executionModel + ? { + instruction: builtPrompt, + modelName: executionModel, + } + : { + instruction: builtPrompt, + }; + + const observeResults = await stagehandPage.page.observe(observeOptions); + + if (!observeResults || observeResults.length === 0) { + return { + success: false, + error: "No observable actions found for the given instruction", + }; } - const isIframeAction = result.action === "an iframe"; + + const observeResult = observeResults[0]; + + const isIframeAction = observeResult.description === "an iframe"; if (isIframeAction) { - const fallback = await stagehandPage.page.act( - executionModel - ? { action, modelName: executionModel, iframes: true } - : { action, iframes: true }, - ); + const iframeObserveOptions = executionModel + ? { + instruction: builtPrompt, + modelName: executionModel, + iframes: true, + } + : { + instruction: builtPrompt, + iframes: true, + }; + + const iframeObserveResults = + await stagehandPage.page.observe(iframeObserveOptions); + + if (!iframeObserveResults || iframeObserveResults.length === 0) { + return { + success: false, + error: "No observable actions found within iframe context", + isIframe: true, + }; + } + + const iframeObserveResult = iframeObserveResults[0]; + const fallback = await stagehandPage.page.act(iframeObserveResult); + return { success: fallback.success, action: fallback.action, isIframe: true, + playwrightArguments: { + description: iframeObserveResult.description, + method: iframeObserveResult.method, + arguments: iframeObserveResult.arguments, + selector: iframeObserveResult.selector, + }, }; } + const result = await stagehandPage.page.act(observeResult); + const playwrightArguments = { + description: observeResult.description, + method: observeResult.method, + arguments: observeResult.arguments, + selector: observeResult.selector, + }; + return { success: result.success, action: result.action, isIframe: false, + playwrightArguments, }; } catch (error) { - return { success: false, error: error.message }; + return { + success: false, + error: error.message, + }; } }, }); diff --git a/lib/handlers/stagehandAgentHandler.ts b/lib/handlers/stagehandAgentHandler.ts index af459f23c..18159987a 100644 --- a/lib/handlers/stagehandAgentHandler.ts +++ b/lib/handlers/stagehandAgentHandler.ts @@ -1,4 +1,9 @@ -import { AgentAction, AgentExecuteOptions, AgentResult } from "@/types/agent"; +import { + AgentAction, + AgentExecuteOptions, + AgentResult, + ActToolResult, +} from "@/types/agent"; import { LogLine } from "@/types/log"; import { StagehandPage } from "../StagehandPage"; import { LLMClient } from "../llm/LLMClient"; @@ -99,7 +104,8 @@ export class StagehandAgentHandler { }); if (event.toolCalls && event.toolCalls.length > 0) { - for (const toolCall of event.toolCalls) { + for (let i = 0; i < event.toolCalls.length; i++) { + const toolCall = event.toolCalls[i]; const args = toolCall.args as Record; if (event.text.length > 0) { @@ -122,6 +128,21 @@ export class StagehandAgentHandler { } } + // Get the tool result if available + const toolResult = event.toolResults?.[i]; + + const getPlaywrightArguments = () => { + if (toolCall.toolName !== "act" || !toolResult) { + return {}; + } + const result = toolResult.result as ActToolResult; + if (result && result.playwrightArguments) { + return { playwrightArguments: result.playwrightArguments }; + } + + return {}; + }; + const action: AgentAction = { type: toolCall.toolName, reasoning: event.text || undefined, @@ -130,6 +151,7 @@ export class StagehandAgentHandler { ? (args?.taskComplete as boolean) : false, ...args, + ...getPlaywrightArguments(), }; actions.push(action); diff --git a/lib/index.ts b/lib/index.ts index 04cd4a5bf..7a1a95470 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -924,6 +924,21 @@ export class Stagehand { "MCP integrations are an experimental feature. Please enable experimental mode by setting experimental: true in the Stagehand constructor params.", ); } + + const executeOptions: AgentExecuteOptions = + typeof instructionOrOptions === "string" + ? { instruction: instructionOrOptions } + : instructionOrOptions; + + if (this.usingAPI) { + const agentConfigForApi: AgentConfig = options; + + return await this.apiClient.agentExecute( + agentConfigForApi, + executeOptions, + ); + } + const tools = options?.integrations ? await resolveTools(options?.integrations, options?.tools) : (options?.tools ?? {}); @@ -934,7 +949,7 @@ export class Stagehand { executionModel, systemInstructions, tools, - ).execute(instructionOrOptions); + ).execute(executeOptions); }, }; } @@ -1028,4 +1043,5 @@ export * from "../types/stagehand"; export * from "../types/stagehandApiErrors"; export * from "../types/stagehandErrors"; export * from "./llm/LLMClient"; +export * from "./llm/aisdk"; export { connectToMCPServer }; diff --git a/package.json b/package.json index 39eb60d2a..226b708a3 100644 --- a/package.json +++ b/package.json @@ -71,7 +71,7 @@ "peerDependencies": { "deepmerge": "^4.3.1", "dotenv": "^16.4.5", - "zod": ">=3.25.0 <4.1.0" + "zod": ">=3.25.0 <3.25.68" }, "dependencies": { "@anthropic-ai/sdk": "0.39.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 79884d2d4..ad8e12d5f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -51,7 +51,7 @@ importers: specifier: ^8.18.0 version: 8.18.1 zod: - specifier: '>=3.25.0 <4.1.0' + specifier: '>=3.25.0 <3.25.68' version: 3.25.67 zod-to-json-schema: specifier: ^3.23.5 @@ -5816,10 +5816,10 @@ snapshots: '@ark/util@0.46.0': {} - '@asteasolutions/zod-to-openapi@6.4.0(zod@3.25.67)': + '@asteasolutions/zod-to-openapi@6.4.0(zod@3.25.76)': dependencies: openapi3-ts: 4.4.0 - zod: 3.25.67 + zod: 3.25.76 '@asyncapi/parser@3.4.0': dependencies: @@ -5872,15 +5872,15 @@ snapshots: '@braintrust/core@0.0.34': dependencies: - '@asteasolutions/zod-to-openapi': 6.4.0(zod@3.25.67) + '@asteasolutions/zod-to-openapi': 6.4.0(zod@3.25.76) uuid: 9.0.1 - zod: 3.25.67 + zod: 3.25.76 '@braintrust/core@0.0.67': dependencies: - '@asteasolutions/zod-to-openapi': 6.4.0(zod@3.25.67) + '@asteasolutions/zod-to-openapi': 6.4.0(zod@3.25.76) uuid: 9.0.1 - zod: 3.25.67 + zod: 3.25.76 '@browserbasehq/sdk@2.5.0': dependencies: @@ -6596,8 +6596,8 @@ snapshots: p-queue: 6.6.2 p-retry: 4.6.2 uuid: 10.0.0 - zod: 3.25.67 - zod-to-json-schema: 3.24.5(zod@3.25.67) + zod: 3.25.76 + zod-to-json-schema: 3.24.5(zod@3.25.76) transitivePeerDependencies: - openai @@ -6605,9 +6605,9 @@ snapshots: dependencies: '@langchain/core': 0.3.50(openai@4.96.2(ws@8.18.1)(zod@3.25.67)) js-tiktoken: 1.0.20 - openai: 4.96.2(ws@8.18.1)(zod@3.25.67) - zod: 3.25.67 - zod-to-json-schema: 3.24.5(zod@3.25.67) + openai: 4.96.2(ws@8.18.1)(zod@3.25.76) + zod: 3.25.76 + zod-to-json-schema: 3.24.5(zod@3.25.76) transitivePeerDependencies: - encoding - ws @@ -7702,8 +7702,8 @@ snapshots: linear-sum-assignment: 1.0.7 mustache: 4.2.0 openai: 4.23.0 - zod: 3.25.67 - zod-to-json-schema: 3.24.5(zod@3.25.67) + zod: 3.25.76 + zod-to-json-schema: 3.24.5(zod@3.25.76) transitivePeerDependencies: - encoding @@ -10581,6 +10581,21 @@ snapshots: transitivePeerDependencies: - encoding + openai@4.96.2(ws@8.18.1)(zod@3.25.76): + dependencies: + '@types/node': 18.19.87 + '@types/node-fetch': 2.6.12 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + optionalDependencies: + ws: 8.18.1 + zod: 3.25.76 + transitivePeerDependencies: + - encoding + openapi-types@12.1.3: {} openapi3-ts@4.4.0: diff --git a/types/agent.ts b/types/agent.ts index 7bcea4992..9be344f0f 100644 --- a/types/agent.ts +++ b/types/agent.ts @@ -1,4 +1,13 @@ import { LogLine } from "./log"; +import { ObserveResult } from "./stagehand"; + +export interface ActToolResult { + success: boolean; + action?: string; + error?: string; + isIframe?: boolean; + playwrightArguments?: ObserveResult | null; +} export interface AgentAction { type: string; @@ -10,6 +19,7 @@ export interface AgentAction { pageText?: string; // ariaTree tool pageUrl?: string; // ariaTree tool instruction?: string; // various tools + playwrightArguments?: ObserveResult | null; // act tool [key: string]: unknown; } diff --git a/types/page.ts b/types/page.ts index 4f93b1fa5..de859efe6 100644 --- a/types/page.ts +++ b/types/page.ts @@ -2,6 +2,7 @@ import type { Browser as PlaywrightBrowser, BrowserContext as PlaywrightContext, Page as PlaywrightPage, + PageScreenshotOptions, } from "playwright"; import { z } from "zod/v3"; import type { @@ -21,7 +22,12 @@ export const pageTextSchema = z.object({ page_text: z.string(), }); -export interface Page extends Omit { +export interface StagehandScreenshotOptions extends PageScreenshotOptions { + /** Controls whether to use CDP for screenshots in Browserbase environment. Defaults to true. */ + useCDP?: boolean; +} + +export interface Page extends Omit { act(action: string): Promise; act(options: ActOptions): Promise; act(observation: ObserveResult): Promise; @@ -38,6 +44,8 @@ export interface Page extends Omit { observe(instruction: string): Promise; observe(options?: ObserveOptions): Promise; + screenshot(options?: StagehandScreenshotOptions): Promise; + on: { (event: "popup", listener: (page: Page) => unknown): Page; } & PlaywrightPage["on"];