From 555f54c274e3c0e5050b734abc2f9680e678d65d Mon Sep 17 00:00:00 2001 From: Miguel Date: Tue, 21 Jan 2025 14:13:50 -0800 Subject: [PATCH 01/23] flagged return action v0.1 --- lib/StagehandPage.ts | 6 ++++++ lib/handlers/observeHandler.ts | 5 ++++- lib/inference.ts | 33 +++++++++++++++++++++++++++++---- types/stagehand.ts | 5 +++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 21e586f4..9149d884 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -469,6 +469,7 @@ export class StagehandPage { useVision = false, domSettleTimeoutMs, useAccessibilityTree = false, + returnAction = false, } = options; const requestId = Math.random().toString(36).substring(2); @@ -497,6 +498,10 @@ export class StagehandPage { value: useAccessibilityTree ? "true" : "false", type: "boolean", }, + returnAction: { + value: returnAction ? "true" : "false", + type: "boolean", + }, }, }); @@ -509,6 +514,7 @@ export class StagehandPage { requestId, domSettleTimeoutMs, useAccessibilityTree, + returnAction, }) .catch((e) => { this.stagehand.log({ diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 5dfb3552..c5fc493d 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -58,6 +58,7 @@ export class StagehandObserveHandler { llmClient, requestId, useAccessibilityTree = false, + returnAction = false, }: { instruction: string; useVision: boolean; @@ -66,6 +67,7 @@ export class StagehandObserveHandler { requestId: string; domSettleTimeoutMs?: number; useAccessibilityTree?: boolean; + returnAction?: boolean; }) { if (!instruction) { instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; @@ -180,6 +182,7 @@ export class StagehandObserveHandler { userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, isUsingAccessibilityTree: useAccessibilityTree, + returnAction, }); const elementsWithSelectors = await Promise.all( observationResponse.elements.map(async (element) => { @@ -202,7 +205,7 @@ export class StagehandObserveHandler { ); return { ...rest, - selector: xpath, + selector: `xpath=${xpath}`, backendNodeId: elementId, }; } diff --git a/lib/inference.ts b/lib/inference.ts index f23db9e2..358020e3 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -287,6 +287,7 @@ export async function observe({ isUsingAccessibilityTree, userProvidedInstructions, logger, + returnAction = false, }: { instruction: string; domElements: string; @@ -296,6 +297,7 @@ export async function observe({ userProvidedInstructions?: string; logger: (message: LogLine) => void; isUsingAccessibilityTree?: boolean; + returnAction?: boolean; }) { const observeSchema = z.object({ elements: z @@ -309,6 +311,21 @@ export async function observe({ ? "a description of the accessible element and its purpose" : "a description of the element and what it is relevant for", ), + ...(returnAction + ? { + method: z + .string() + .describe( + "the candidate method/action to interact with the element. Select one of the available Playwright interaction methods.", + ), + arguments: z + .array( + z.string().describe( + "the arguments to pass to the method. For example, for a click, the arguments are empty, but for a fill, the arguments are the value to fill in.", + ) + ) + } + : {}), }), ) .describe( @@ -351,10 +368,18 @@ export async function observe({ }); const parsedResponse = { elements: - observationResponse.elements?.map((el) => ({ - elementId: Number(el.elementId), - description: String(el.description), - })) ?? [], + observationResponse.elements?.map((el) => { + const base = { + elementId: Number(el.elementId), + description: String(el.description), + }; + + return returnAction ? { + ...base, + method: String(el.method), + arguments: el.arguments, + } : base; + }) ?? [], } satisfies { elements: { elementId: number; description: string }[] }; return parsedResponse; diff --git a/types/stagehand.ts b/types/stagehand.ts index f47378b0..0f84177f 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -88,9 +88,14 @@ export interface ObserveOptions { useVision?: boolean; domSettleTimeoutMs?: number; useAccessibilityTree?: boolean; + returnAction?: boolean; } export interface ObserveResult { selector: string; description: string; + backendNodeId: number; + //TODO: review name + method?: string; + arguments?: any[]; } From d64a1a8bbcf1c52e22234e7db0404e209cc88c30 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 22 Jan 2025 17:03:07 -0800 Subject: [PATCH 02/23] isVisible flag added to observe --- lib/StagehandPage.ts | 2 + lib/a11y/utils.ts | 321 ++++++++++++++++++++++++++++++++- lib/handlers/observeHandler.ts | 33 ++-- lib/inference.ts | 25 +-- types/stagehand.ts | 5 +- 5 files changed, 361 insertions(+), 25 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 9149d884..4e168cb5 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -470,6 +470,7 @@ export class StagehandPage { domSettleTimeoutMs, useAccessibilityTree = false, returnAction = false, + visibleElements = false, } = options; const requestId = Math.random().toString(36).substring(2); @@ -515,6 +516,7 @@ export class StagehandPage { domSettleTimeoutMs, useAccessibilityTree, returnAction, + visibleElements, }) .catch((e) => { this.stagehand.log({ diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 4d710c6d..6ac6178e 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -1,7 +1,8 @@ import { AccessibilityNode, TreeResult, AXNode } from "../../types/context"; import { StagehandPage } from "../StagehandPage"; import { LogLine } from "../../types/log"; -import { CDPSession } from "playwright"; +import { CDPSession, Page, Locator } from "playwright"; +import { PlaywrightCommandMethodNotSupportedException, PlaywrightCommandException } from "@/types/playwright"; // Parser function for str output export function formatSimplifiedTree( @@ -218,3 +219,321 @@ export async function getXPathByResolvedObjectId( return result.value || ""; } + +export async function performPlaywrightMethod( + stagehandPage: StagehandPage, + logger: (logLine: LogLine) => void, + method: string, + args: unknown[], + xpath: string, + domSettleTimeoutMs?: number, +) { + const locator = stagehandPage.page.locator(`xpath=${xpath}`).first(); + const initialUrl = stagehandPage.page.url(); + + logger({ + category: "action", + message: "performing playwright method", + level: 2, + auxiliary: { + xpath: { + value: xpath, + type: "string", + }, + method: { + value: method, + type: "string", + }, + }, + }); + + if (method === "scrollIntoView") { + logger({ + category: "action", + message: "scrolling element into view", + level: 2, + auxiliary: { + xpath: { + value: xpath, + type: "string", + }, + }, + }); + try { + await locator + .evaluate((element: HTMLElement) => { + element.scrollIntoView({ behavior: "smooth", block: "center" }); + }) + .catch((e: Error) => { + logger({ + category: "action", + message: "error scrolling element into view", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + xpath: { + value: xpath, + type: "string", + }, + }, + }); + }); + } catch (e) { + logger({ + category: "action", + message: "error scrolling element into view", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + xpath: { + value: xpath, + type: "string", + }, + }, + }); + + throw new PlaywrightCommandException(e.message); + } + } else if (method === "fill" || method === "type") { + try { + await locator.fill(""); + await locator.click(); + const text = args[0]?.toString(); + for (const char of text) { + await stagehandPage.page.keyboard.type(char, { + delay: Math.random() * 50 + 25, + }); + } + } catch (e) { + logger({ + category: "action", + message: "error filling element", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + xpath: { + value: xpath, + type: "string", + }, + }, + }); + + throw new PlaywrightCommandException(e.message); + } + } else if (method === "press") { + try { + const key = args[0]?.toString(); + await stagehandPage.page.keyboard.press(key); + } catch (e) { + logger({ + category: "action", + message: "error pressing key", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + key: { + value: args[0]?.toString() ?? "unknown", + type: "string", + }, + }, + }); + + throw new PlaywrightCommandException(e.message); + } + } else if (typeof locator[method as keyof typeof locator] === "function") { + // Log current URL before action + logger({ + category: "action", + message: "page URL before action", + level: 2, + auxiliary: { + url: { + value: stagehandPage.page.url(), + type: "string", + }, + }, + }); + + // Perform the action + try { + await ( + locator[method as keyof Locator] as unknown as ( + ...args: string[] + ) => Promise + )(...args.map((arg) => arg?.toString() || "")); + } catch (e) { + logger({ + category: "action", + message: "error performing method", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + xpath: { + value: xpath, + type: "string", + }, + method: { + value: method, + type: "string", + }, + args: { + value: JSON.stringify(args), + type: "object", + }, + }, + }); + + throw new PlaywrightCommandException(e.message); + } + + // Handle navigation if a new page is opened + if (method === "click") { + logger({ + category: "action", + message: "clicking element, checking for page navigation", + level: 1, + auxiliary: { + xpath: { + value: xpath, + type: "string", + }, + }, + }); + + // NAVIDNOTE: Should this happen before we wait for locator[method]? + const newOpenedTab = await Promise.race([ + new Promise((resolve) => { + // TODO: This is a hack to get the new page + // We should find a better way to do this + stagehandPage.context.once("page", (page) => resolve(page)); + setTimeout(() => resolve(null), 1_500); + }), + ]); + + logger({ + category: "action", + message: "clicked element", + level: 1, + auxiliary: { + newOpenedTab: { + value: newOpenedTab ? "opened a new tab" : "no new tabs opened", + type: "string", + }, + }, + }); + + if (newOpenedTab) { + logger({ + category: "action", + message: "new page detected (new tab) with URL", + level: 1, + auxiliary: { + url: { + value: newOpenedTab.url(), + type: "string", + }, + }, + }); + await newOpenedTab.close(); + await stagehandPage.page.goto(newOpenedTab.url()); + await stagehandPage.page.waitForLoadState("domcontentloaded"); + await stagehandPage._waitForSettledDom(domSettleTimeoutMs); + } + + await Promise.race([ + stagehandPage.page.waitForLoadState("networkidle"), + new Promise((resolve) => setTimeout(resolve, 5_000)), + ]).catch((e) => { + logger({ + category: "action", + message: "network idle timeout hit", + level: 1, + auxiliary: { + trace: { + value: e.stack, + type: "string", + }, + message: { + value: e.message, + type: "string", + }, + }, + }); + }); + + logger({ + category: "action", + message: "finished waiting for (possible) page navigation", + level: 1, + }); + + if (stagehandPage.page.url() !== initialUrl) { + logger({ + category: "action", + message: "new page detected with URL", + level: 1, + auxiliary: { + url: { + value: stagehandPage.page.url(), + type: "string", + }, + }, + }); + } + } + } else { + logger({ + category: "action", + message: "chosen method is invalid", + level: 1, + auxiliary: { + method: { + value: method, + type: "string", + }, + }, + }); + + throw new PlaywrightCommandMethodNotSupportedException( + `Method ${method} not supported`, + ); + } + + await stagehandPage._waitForSettledDom(domSettleTimeoutMs); +} + diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index c5fc493d..ce9a90e7 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -9,6 +9,7 @@ import { getAccessibilityTree, getXPathByResolvedObjectId, } from "../a11y/utils"; +import fs from "fs"; export class StagehandObserveHandler { private readonly stagehand: Stagehand; @@ -57,8 +58,9 @@ export class StagehandObserveHandler { fullPage, llmClient, requestId, - useAccessibilityTree = false, - returnAction = false, + useAccessibilityTree, + returnAction, + visibleElements, }: { instruction: string; useVision: boolean; @@ -68,6 +70,7 @@ export class StagehandObserveHandler { domSettleTimeoutMs?: number; useAccessibilityTree?: boolean; returnAction?: boolean; + visibleElements?: boolean; }) { if (!instruction) { instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; @@ -95,6 +98,8 @@ export class StagehandObserveHandler { return window.processAllOfDom().then((result) => result); }); + fs.writeFileSync("../output_substring.txt", evalResult.outputString); + // For each element in the selector map, get its backendNodeId for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { try { @@ -193,6 +198,10 @@ export class StagehandObserveHandler { ([, value]) => value === elementId, )?.[0]; if (!index || !selectorMap[index]?.[0]) { + // If visibleElements flag is true only return elements that are found in selectorMap + if (visibleElements) { + return null; + } // Generate xpath for the given element if not found in selectorMap const { object } = await this.stagehandPage.sendCDP<{ object: { objectId: string }; @@ -206,24 +215,26 @@ export class StagehandObserveHandler { return { ...rest, selector: `xpath=${xpath}`, - backendNodeId: elementId, + // backendNodeId: elementId, }; } return { ...rest, selector: `xpath=${selectorMap[index][0]}`, - backendNodeId: elementId, + // backendNodeId: elementId, }; } return { ...rest, selector: `xpath=${selectorMap[elementId][0]}`, - backendNodeId: backendNodeIdMap[elementId], + // backendNodeId: backendNodeIdMap[elementId], }; - }), + }) + ); + const filteredElements = elementsWithSelectors.filter( + (element): element is NonNullable => element !== null, ); - await this.stagehandPage.cleanupDomDebug(); this.logger({ @@ -232,13 +243,13 @@ export class StagehandObserveHandler { level: 1, auxiliary: { elements: { - value: JSON.stringify(elementsWithSelectors), + value: JSON.stringify(filteredElements), type: "object", }, }, }); - - await this._recordObservation(instruction, elementsWithSelectors); - return elementsWithSelectors; + + await this._recordObservation(instruction, filteredElements); + return filteredElements; } } diff --git a/lib/inference.ts b/lib/inference.ts index 358020e3..93f02d1a 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -318,12 +318,13 @@ export async function observe({ .describe( "the candidate method/action to interact with the element. Select one of the available Playwright interaction methods.", ), - arguments: z - .array( - z.string().describe( + arguments: z.array( + z + .string() + .describe( "the arguments to pass to the method. For example, for a click, the arguments are empty, but for a fill, the arguments are the value to fill in.", - ) - ) + ), + ), } : {}), }), @@ -373,12 +374,14 @@ export async function observe({ elementId: Number(el.elementId), description: String(el.description), }; - - return returnAction ? { - ...base, - method: String(el.method), - arguments: el.arguments, - } : base; + + return returnAction + ? { + ...base, + method: String(el.method), + arguments: el.arguments, + } + : base; }) ?? [], } satisfies { elements: { elementId: number; description: string }[] }; diff --git a/types/stagehand.ts b/types/stagehand.ts index 0f84177f..05f244f3 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -89,13 +89,14 @@ export interface ObserveOptions { domSettleTimeoutMs?: number; useAccessibilityTree?: boolean; returnAction?: boolean; + visibleElements?: boolean; } export interface ObserveResult { selector: string; description: string; - backendNodeId: number; + backendNodeId?: number; //TODO: review name method?: string; - arguments?: any[]; + arguments?: string[]; } From cd8a4f2f74a3f4d3e82a8a08355d8f8cfbb4fa06 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 22 Jan 2025 18:13:45 -0800 Subject: [PATCH 03/23] prettier and cleanup of fs functions --- lib/a11y/utils.ts | 6 ++++-- lib/handlers/observeHandler.ts | 7 ++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 6ac6178e..ca5a7479 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -2,7 +2,10 @@ import { AccessibilityNode, TreeResult, AXNode } from "../../types/context"; import { StagehandPage } from "../StagehandPage"; import { LogLine } from "../../types/log"; import { CDPSession, Page, Locator } from "playwright"; -import { PlaywrightCommandMethodNotSupportedException, PlaywrightCommandException } from "@/types/playwright"; +import { + PlaywrightCommandMethodNotSupportedException, + PlaywrightCommandException, +} from "@/types/playwright"; // Parser function for str output export function formatSimplifiedTree( @@ -536,4 +539,3 @@ export async function performPlaywrightMethod( await stagehandPage._waitForSettledDom(domSettleTimeoutMs); } - diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index ce9a90e7..9d4bbaa7 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -9,7 +9,6 @@ import { getAccessibilityTree, getXPathByResolvedObjectId, } from "../a11y/utils"; -import fs from "fs"; export class StagehandObserveHandler { private readonly stagehand: Stagehand; @@ -98,8 +97,6 @@ export class StagehandObserveHandler { return window.processAllOfDom().then((result) => result); }); - fs.writeFileSync("../output_substring.txt", evalResult.outputString); - // For each element in the selector map, get its backendNodeId for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { try { @@ -230,7 +227,7 @@ export class StagehandObserveHandler { selector: `xpath=${selectorMap[elementId][0]}`, // backendNodeId: backendNodeIdMap[elementId], }; - }) + }), ); const filteredElements = elementsWithSelectors.filter( (element): element is NonNullable => element !== null, @@ -248,7 +245,7 @@ export class StagehandObserveHandler { }, }, }); - + await this._recordObservation(instruction, filteredElements); return filteredElements; } From 19f92aa6dd7553b1f0eef3cc2ebbb3799a0469d2 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 10:46:44 -0800 Subject: [PATCH 04/23] simplifying observe flagging --- lib/StagehandPage.ts | 15 ++---- lib/handlers/observeHandler.ts | 88 ++++++++++++++++------------------ types/stagehand.ts | 1 - 3 files changed, 44 insertions(+), 60 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 4e168cb5..816e4379 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -463,13 +463,12 @@ export class StagehandPage { : instructionOrOptions || {}; const { - instruction = "Find actions that can be performed on this page.", + instruction, modelName, modelClientOptions, useVision = false, domSettleTimeoutMs, - useAccessibilityTree = false, - returnAction = false, + returnAction = true, visibleElements = false, } = options; @@ -495,12 +494,8 @@ export class StagehandPage { value: llmClient.modelName, type: "string", }, - useAccessibilityTree: { - value: useAccessibilityTree ? "true" : "false", - type: "boolean", - }, - returnAction: { - value: returnAction ? "true" : "false", + visibleElements: { + value: visibleElements ? "true" : "false", type: "boolean", }, }, @@ -511,10 +506,8 @@ export class StagehandPage { instruction, llmClient, useVision, - fullPage: false, requestId, domSettleTimeoutMs, - useAccessibilityTree, returnAction, visibleElements, }) diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 9d4bbaa7..dc42ed77 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -54,20 +54,16 @@ export class StagehandObserveHandler { public async observe({ instruction, useVision, - fullPage, llmClient, requestId, - useAccessibilityTree, returnAction, visibleElements, }: { instruction: string; useVision: boolean; - fullPage: boolean; llmClient: LLMClient; requestId: string; domSettleTimeoutMs?: number; - useAccessibilityTree?: boolean; returnAction?: boolean; visibleElements?: boolean; }) { @@ -90,60 +86,56 @@ export class StagehandObserveHandler { let selectorMap: Record = {}; const backendNodeIdMap: Record = {}; - await this.stagehandPage.startDomDebug(); - await this.stagehandPage.enableCDP("DOM"); - const evalResult = await this.stagehand.page.evaluate(() => { return window.processAllOfDom().then((result) => result); }); - - // For each element in the selector map, get its backendNodeId - for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { - try { - // Use the first xpath to find the element - const xpath = xpaths[0]; - const { result } = await this.stagehandPage.sendCDP<{ - result: { objectId: string }; - }>("Runtime.evaluate", { - expression: `document.evaluate('${xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, - returnByValue: false, - }); - - if (result.objectId) { - // Get the node details using CDP - const { node } = await this.stagehandPage.sendCDP<{ - node: { backendNodeId: number }; - }>("DOM.describeNode", { - objectId: result.objectId, - depth: -1, - pierce: true, + ({ outputString, selectorMap } = evalResult); + + if (!visibleElements) { + await this.stagehandPage.startDomDebug(); + await this.stagehandPage.enableCDP("DOM"); + + // For each element in the selector map, get its backendNodeId + for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { + try { + // Use the first xpath to find the element + const xpath = xpaths[0]; + const { result } = await this.stagehandPage.sendCDP<{ + result: { objectId: string }; + }>("Runtime.evaluate", { + expression: `document.evaluate('${xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, + returnByValue: false, }); - if (node.backendNodeId) { - backendNodeIdMap[index] = node.backendNodeId; + if (result.objectId) { + // Get the node details using CDP + const { node } = await this.stagehandPage.sendCDP<{ + node: { backendNodeId: number }; + }>("DOM.describeNode", { + objectId: result.objectId, + depth: -1, + pierce: true, + }); + + if (node.backendNodeId) { + backendNodeIdMap[index] = node.backendNodeId; + } } + } catch (error) { + console.warn( + `Failed to get backendNodeId for element ${index}:`, + error, + ); + continue; } - } catch (error) { - console.warn( - `Failed to get backendNodeId for element ${index}:`, - error, - ); - continue; } - } - - await this.stagehandPage.disableCDP("DOM"); - ({ outputString, selectorMap } = evalResult); - - if (useAccessibilityTree) { + await this.stagehandPage.disableCDP("DOM"); const tree = await getAccessibilityTree(this.stagehandPage, this.logger); - this.logger({ category: "observation", message: "Getting accessibility tree data", level: 1, }); - outputString = tree.simplified; } @@ -170,11 +162,11 @@ export class StagehandObserveHandler { ); annotatedScreenshot = - await screenshotService.getAnnotatedScreenshot(fullPage); + await screenshotService.getAnnotatedScreenshot(true); outputString = "n/a. use the image to find the elements."; } } - + const observationResponse = await observe({ instruction, domElements: outputString, @@ -183,14 +175,14 @@ export class StagehandObserveHandler { requestId, userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, - isUsingAccessibilityTree: useAccessibilityTree, + isUsingAccessibilityTree: !visibleElements, returnAction, }); const elementsWithSelectors = await Promise.all( observationResponse.elements.map(async (element) => { const { elementId, ...rest } = element; - if (useAccessibilityTree) { + if (!visibleElements) { const index = Object.entries(backendNodeIdMap).find( ([, value]) => value === elementId, )?.[0]; diff --git a/types/stagehand.ts b/types/stagehand.ts index 05f244f3..5c00732c 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -87,7 +87,6 @@ export interface ObserveOptions { modelClientOptions?: ClientOptions; useVision?: boolean; domSettleTimeoutMs?: number; - useAccessibilityTree?: boolean; returnAction?: boolean; visibleElements?: boolean; } From 4b8c7281a07d6ac6d4f8fab10e870541fae5fba0 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 13:25:53 -0800 Subject: [PATCH 05/23] two flags: useAccessibilityTree and returnAction. No more processAlDOM on a11y context --- lib/StagehandPage.ts | 8 +- lib/handlers/observeHandler.ts | 181 ++++++++++++++++----------------- types/stagehand.ts | 2 +- 3 files changed, 93 insertions(+), 98 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 816e4379..81cef3b4 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -469,7 +469,7 @@ export class StagehandPage { useVision = false, domSettleTimeoutMs, returnAction = true, - visibleElements = false, + useAccessibilityTree = true, } = options; const requestId = Math.random().toString(36).substring(2); @@ -494,8 +494,8 @@ export class StagehandPage { value: llmClient.modelName, type: "string", }, - visibleElements: { - value: visibleElements ? "true" : "false", + useAccessibilityTree: { + value: useAccessibilityTree ? "true" : "false", type: "boolean", }, }, @@ -509,7 +509,7 @@ export class StagehandPage { requestId, domSettleTimeoutMs, returnAction, - visibleElements, + useAccessibilityTree, }) .catch((e) => { this.stagehand.log({ diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index dc42ed77..d51eca86 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -57,7 +57,7 @@ export class StagehandObserveHandler { llmClient, requestId, returnAction, - visibleElements, + useAccessibilityTree, }: { instruction: string; useVision: boolean; @@ -65,7 +65,7 @@ export class StagehandObserveHandler { requestId: string; domSettleTimeoutMs?: number; returnAction?: boolean; - visibleElements?: boolean; + useAccessibilityTree?: boolean; }) { if (!instruction) { instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; @@ -82,54 +82,49 @@ export class StagehandObserveHandler { }, }); - let outputString: string; - let selectorMap: Record = {}; const backendNodeIdMap: Record = {}; + let selectorMap: Record = {}; + let outputString: string; - const evalResult = await this.stagehand.page.evaluate(() => { - return window.processAllOfDom().then((result) => result); - }); - ({ outputString, selectorMap } = evalResult); - - if (!visibleElements) { - await this.stagehandPage.startDomDebug(); - await this.stagehandPage.enableCDP("DOM"); - - // For each element in the selector map, get its backendNodeId - for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { - try { - // Use the first xpath to find the element - const xpath = xpaths[0]; - const { result } = await this.stagehandPage.sendCDP<{ - result: { objectId: string }; - }>("Runtime.evaluate", { - expression: `document.evaluate('${xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, - returnByValue: false, - }); - - if (result.objectId) { - // Get the node details using CDP - const { node } = await this.stagehandPage.sendCDP<{ - node: { backendNodeId: number }; - }>("DOM.describeNode", { - objectId: result.objectId, - depth: -1, - pierce: true, - }); - - if (node.backendNodeId) { - backendNodeIdMap[index] = node.backendNodeId; - } - } - } catch (error) { - console.warn( - `Failed to get backendNodeId for element ${index}:`, - error, - ); - continue; - } - } - await this.stagehandPage.disableCDP("DOM"); + if (useAccessibilityTree) { + // await this.stagehandPage.startDomDebug(); + // await this.stagehandPage.enableCDP("DOM"); + + // // For each element in the selector map, get its backendNodeId + // for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { + // try { + // // Use the first xpath to find the element + // const xpath = xpaths[0]; + // const { result } = await this.stagehandPage.sendCDP<{ + // result: { objectId: string }; + // }>("Runtime.evaluate", { + // expression: `document.evaluate('${xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, + // returnByValue: false, + // }); + + // if (result.objectId) { + // // Get the node details using CDP + // const { node } = await this.stagehandPage.sendCDP<{ + // node: { backendNodeId: number }; + // }>("DOM.describeNode", { + // objectId: result.objectId, + // depth: -1, + // pierce: true, + // }); + + // if (node.backendNodeId) { + // backendNodeIdMap[index] = node.backendNodeId; + // } + // } + // } catch (error) { + // console.warn( + // `Failed to get backendNodeId for element ${index}:`, + // error, + // ); + // continue; + // } + // } + // await this.stagehandPage.disableCDP("DOM"); const tree = await getAccessibilityTree(this.stagehandPage, this.logger); this.logger({ category: "observation", @@ -137,6 +132,11 @@ export class StagehandObserveHandler { level: 1, }); outputString = tree.simplified; + } else { + const evalResult = await this.stagehand.page.evaluate(() => { + return window.processAllOfDom().then((result) => result); + }); + ({ outputString, selectorMap } = evalResult); } let annotatedScreenshot: Buffer | undefined; @@ -166,7 +166,7 @@ export class StagehandObserveHandler { outputString = "n/a. use the image to find the elements."; } } - + console.time("LLM inference"); const observationResponse = await observe({ instruction, domElements: outputString, @@ -175,43 +175,41 @@ export class StagehandObserveHandler { requestId, userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, - isUsingAccessibilityTree: !visibleElements, + isUsingAccessibilityTree: useAccessibilityTree, returnAction, }); + console.timeEnd("LLM inference"); const elementsWithSelectors = await Promise.all( observationResponse.elements.map(async (element) => { const { elementId, ...rest } = element; - if (!visibleElements) { - const index = Object.entries(backendNodeIdMap).find( - ([, value]) => value === elementId, - )?.[0]; - if (!index || !selectorMap[index]?.[0]) { - // If visibleElements flag is true only return elements that are found in selectorMap - if (visibleElements) { - return null; - } - // Generate xpath for the given element if not found in selectorMap - const { object } = await this.stagehandPage.sendCDP<{ - object: { objectId: string }; - }>("DOM.resolveNode", { - backendNodeId: elementId, - }); - const xpath = await getXPathByResolvedObjectId( - await this.stagehandPage.getCDPClient(), - object.objectId, - ); - return { - ...rest, - selector: `xpath=${xpath}`, - // backendNodeId: elementId, - }; - } + if (useAccessibilityTree) { + // const index = Object.entries(backendNodeIdMap).find( + // ([, value]) => value === elementId, + // )?.[0]; + // if (!index || !selectorMap[index]?.[0]) { + + // Generate xpath for the given element if not found in selectorMap + const { object } = await this.stagehandPage.sendCDP<{ + object: { objectId: string }; + }>("DOM.resolveNode", { + backendNodeId: elementId, + }); + const xpath = await getXPathByResolvedObjectId( + await this.stagehandPage.getCDPClient(), + object.objectId, + ); return { ...rest, - selector: `xpath=${selectorMap[index][0]}`, + selector: `xpath=${xpath}`, // backendNodeId: elementId, }; + // } + // return { + // ...rest, + // selector: `xpath=${selectorMap[index][0]}`, + // // backendNodeId: elementId, + // }; } return { @@ -221,24 +219,21 @@ export class StagehandObserveHandler { }; }), ); - const filteredElements = elementsWithSelectors.filter( - (element): element is NonNullable => element !== null, - ); await this.stagehandPage.cleanupDomDebug(); - this.logger({ - category: "observation", - message: "found elements", - level: 1, - auxiliary: { - elements: { - value: JSON.stringify(filteredElements), - type: "object", - }, - }, - }); - - await this._recordObservation(instruction, filteredElements); - return filteredElements; + // this.logger({ + // category: "observation", + // message: "found elements", + // level: 1, + // auxiliary: { + // elements: { + // value: JSON.stringify(elementsWithSelectors), + // type: "object", + // }, + // }, + // }); + + await this._recordObservation(instruction, elementsWithSelectors); + return elementsWithSelectors; } } diff --git a/types/stagehand.ts b/types/stagehand.ts index 5c00732c..300df362 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -88,7 +88,7 @@ export interface ObserveOptions { useVision?: boolean; domSettleTimeoutMs?: number; returnAction?: boolean; - visibleElements?: boolean; + useAccessibilityTree?: boolean; } export interface ObserveResult { From de526e5f71088634b0a8463c05efe32f73d440f6 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 15:01:58 -0800 Subject: [PATCH 06/23] github eval --- evals/tasks/observe_github.ts | 70 +++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 evals/tasks/observe_github.ts diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts new file mode 100644 index 00000000..4ec4b0aa --- /dev/null +++ b/evals/tasks/observe_github.ts @@ -0,0 +1,70 @@ +import { initStagehand } from "@/evals/initStagehand"; +import { EvalFunction } from "@/types/evals"; + +export const observe_github: EvalFunction = async ({ + modelName, + logger, + useAccessibilityTree, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto("https://github.com/browserbase/stagehand/tree/main/lib"); + + const observations = await stagehand.page.observe({instruction: "find the scrollable element that repos file tree"}); + console.log("observations", JSON.stringify(observations, null, 2)); + + if (observations.length === 0) { + await stagehand.close(); + return { + _success: false, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + const expectedLocator = `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`; + + const expectedResult = await stagehand.page + .locator(expectedLocator) + .first() + .innerText(); + + let foundMatch = false; + for (const observation of observations) { + try { + const observationResult = await stagehand.page + .locator(observation.selector) + .first() + .innerText(); + + if (observationResult === expectedResult) { + foundMatch = true; + break; + } + } catch (error) { + console.warn( + `Failed to check observation with selector ${observation.selector}:`, + error.message, + ); + continue; + } + } + + await stagehand.close(); + + return { + _success: foundMatch, + expected: expectedResult, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; From a8e44260a12fe3f8ff900f4728c2076f5424c78f Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 15:05:06 -0800 Subject: [PATCH 07/23] add back in a11y param (whoops) --- evals/tasks/observe_github.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts index 4ec4b0aa..3880a23f 100644 --- a/evals/tasks/observe_github.ts +++ b/evals/tasks/observe_github.ts @@ -13,10 +13,14 @@ export const observe_github: EvalFunction = async ({ const { debugUrl, sessionUrl } = initResponse; - await stagehand.page.goto("https://github.com/browserbase/stagehand/tree/main/lib"); + await stagehand.page.goto( + "https://github.com/browserbase/stagehand/tree/main/lib", + ); - const observations = await stagehand.page.observe({instruction: "find the scrollable element that repos file tree"}); - console.log("observations", JSON.stringify(observations, null, 2)); + const observations = await stagehand.page.observe({ + instruction: "find the scrollable element that repos file tree", + useAccessibilityTree, + }); if (observations.length === 0) { await stagehand.close(); From e671aba96099a16e4d138e434b365a9396382194 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 15:06:12 -0800 Subject: [PATCH 08/23] google search observe eval (returnAction) --- evals/tasks/observe_simple_google_search.ts | 70 +++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 evals/tasks/observe_simple_google_search.ts diff --git a/evals/tasks/observe_simple_google_search.ts b/evals/tasks/observe_simple_google_search.ts new file mode 100644 index 00000000..1fcd1a7b --- /dev/null +++ b/evals/tasks/observe_simple_google_search.ts @@ -0,0 +1,70 @@ +import { EvalFunction } from "@/types/evals"; +import { initStagehand } from "@/evals/initStagehand"; +import { performPlaywrightMethod } from "@/lib/a11y/utils"; + +export const observe_simple_google_search: EvalFunction = async ({ + modelName, + logger, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto("https://www.google.com"); + + // await stagehand.page.act({ + // action: 'Search for "OpenAI"', + // }); + const observation1 = await stagehand.page.observe({ + instruction: "Find the search bar and enter 'OpenAI'", + useAccessibilityTree: true, + returnAction: true, + }); + console.log(observation1); + + if (observation1.length > 0) { + const action1 = observation1[0]; + await performPlaywrightMethod( + stagehand.stagehandPage, + stagehand.logger, + action1.method, + action1.arguments, + action1.selector.replace("xpath=", ""), + ); + } + await stagehand.page.waitForTimeout(5000); + const observation2 = await stagehand.page.observe({ + instruction: "Click the search button in the suggestions dropdown", + useAccessibilityTree: true, + returnAction: true, + }); + console.log(observation2); + + if (observation2.length > 0) { + const action2 = observation2[0]; + await performPlaywrightMethod( + stagehand.stagehandPage, + stagehand.logger, + action2.method, + action2.arguments, + action2.selector.replace("xpath=", ""), + ); + } + await stagehand.page.waitForTimeout(5000); + + const expectedUrl = "https://www.google.com/search?q=OpenAI"; + const currentUrl = stagehand.page.url(); + + await stagehand.close(); + + return { + _success: currentUrl.startsWith(expectedUrl), + currentUrl, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; From dd35199aca1822220bf210e3dc8a11f870775f21 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 15:07:42 -0800 Subject: [PATCH 09/23] fix my terrible grammar in the instruction --- evals/tasks/observe_github.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts index 3880a23f..200fd939 100644 --- a/evals/tasks/observe_github.ts +++ b/evals/tasks/observe_github.ts @@ -18,7 +18,7 @@ export const observe_github: EvalFunction = async ({ ); const observations = await stagehand.page.observe({ - instruction: "find the scrollable element that repos file tree", + instruction: "find the scrollable element that holds the repos file tree", useAccessibilityTree, }); From f4026190557e2498ba292f71c0ab496e4856087a Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 15:13:48 -0800 Subject: [PATCH 10/23] amazon actionable eval --- evals/tasks/observe_amazon_add_to_cart.ts | 75 +++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 evals/tasks/observe_amazon_add_to_cart.ts diff --git a/evals/tasks/observe_amazon_add_to_cart.ts b/evals/tasks/observe_amazon_add_to_cart.ts new file mode 100644 index 00000000..7c96315b --- /dev/null +++ b/evals/tasks/observe_amazon_add_to_cart.ts @@ -0,0 +1,75 @@ +import { EvalFunction } from "@/types/evals"; +import { initStagehand } from "@/evals/initStagehand"; +import { performPlaywrightMethod } from "@/lib/a11y/utils"; + +export const observe_amazon_add_to_cart: EvalFunction = async ({ + modelName, + logger, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto( + "https://www.amazon.com/Laptop-MacBook-Surface-Water-Resistant-Accessories/dp/B0D5M4H5CD", + ); + + await stagehand.page.waitForTimeout(5000); + + const observations1 = await stagehand.page.observe({ + instruction: "Find and click the 'Add to Cart' button", + useAccessibilityTree: true, + returnAction: true, + }); + + console.log(observations1); + + // Example of using performPlaywrightMethod if you have the xpath + if (observations1.length > 0) { + const action1 = observations1[0]; + await performPlaywrightMethod( + stagehand.stagehandPage, + stagehand.logger, + action1.method, + action1.arguments, + action1.selector.replace("xpath=", ""), + ); + } + + await stagehand.page.waitForTimeout(2000); + + const observations2 = await stagehand.page.observe({ + instruction: "Find and click the 'Proceed to checkout' button", + useAccessibilityTree: true, + returnAction: true, + }); + + // Example of using performPlaywrightMethod if you have the xpath + if (observations2.length > 0) { + const action2 = observations2[0]; + await performPlaywrightMethod( + stagehand.stagehandPage, + stagehand.logger, + action2.method, + action2.arguments, + action2.selector.replace("xpath=", ""), + ); + } + await stagehand.page.waitForTimeout(2000); + + const currentUrl = stagehand.page.url(); + const expectedUrlPrefix = "https://www.amazon.com/ap/signin"; + + await stagehand.close(); + + return { + _success: currentUrl.startsWith(expectedUrlPrefix), + currentUrl, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; From ddfcc1e2ab5681c7533da8ab526fd9af979cf00a Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 15:24:34 -0800 Subject: [PATCH 11/23] add gh eval to config --- evals/evals.config.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/evals/evals.config.json b/evals/evals.config.json index 6906cb06..f66ee5c5 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -215,6 +215,10 @@ { "name": "extract_zillow", "categories": ["text_extract"] + }, + { + "name": "observe_github", + "categories": ["observe"] } ] } From 8dbe06ff6d9c56db485ccc540f4ae3f4f2d97e2d Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 15:49:27 -0800 Subject: [PATCH 12/23] vtj eval --- evals/evals.config.json | 4 ++ evals/tasks/observe_vantechjournal.ts | 74 +++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 evals/tasks/observe_vantechjournal.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index f66ee5c5..f7c693d5 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -219,6 +219,10 @@ { "name": "observe_github", "categories": ["observe"] + }, + { + "name": "observe_vantechjournal", + "categories": ["observe"] } ] } diff --git a/evals/tasks/observe_vantechjournal.ts b/evals/tasks/observe_vantechjournal.ts new file mode 100644 index 00000000..8169dd1a --- /dev/null +++ b/evals/tasks/observe_vantechjournal.ts @@ -0,0 +1,74 @@ +import { initStagehand } from "@/evals/initStagehand"; +import { EvalFunction } from "@/types/evals"; + +export const observe_vantechjournal: EvalFunction = async ({ + modelName, + logger, + useAccessibilityTree, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto( + "https://vantechjournal.com/archive?page=8", + ); + + const observations = await stagehand.page.observe({ + instruction: "find the button that takes us to the 11th page", + useAccessibilityTree, + }); + + if (observations.length === 0) { + await stagehand.close(); + return { + _success: false, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + const expectedLocator = `a.rounded-lg:nth-child(8)`; + + const expectedResult = await stagehand.page + .locator(expectedLocator) + .first() + .innerText(); + + let foundMatch = false; + for (const observation of observations) { + try { + const observationResult = await stagehand.page + .locator(observation.selector) + .first() + .innerText(); + + if (observationResult === expectedResult) { + foundMatch = true; + break; + } + } catch (error) { + console.warn( + `Failed to check observation with selector ${observation.selector}:`, + error.message, + ); + continue; + } + } + + await stagehand.close(); + + return { + _success: foundMatch, + expected: expectedResult, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; From c66449b1177e0cb16f32b66a5668a1ce7cc36d3c Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 16:15:17 -0800 Subject: [PATCH 13/23] added evals to config.json --- evals/evals.config.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/evals/evals.config.json b/evals/evals.config.json index f7c693d5..c02ff7e1 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -223,6 +223,14 @@ { "name": "observe_vantechjournal", "categories": ["observe"] + }, + { + "name": "observe_amazon_add_to_cart", + "categories": ["observe"] + }, + { + "name": "observe_simple_google_search", + "categories": ["observe"] } ] } From f6039d2334c50f3318e103fda718cd8ba7cdcb76 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 16:39:24 -0800 Subject: [PATCH 14/23] fixing lint/build issues --- evals/tasks/observe_amazon_add_to_cart.ts | 4 +-- evals/tasks/observe_simple_google_search.ts | 4 +-- evals/tasks/observe_vantechjournal.ts | 4 +-- lib/a11y/utils.ts | 36 ++++++++++----------- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/evals/tasks/observe_amazon_add_to_cart.ts b/evals/tasks/observe_amazon_add_to_cart.ts index 7c96315b..18595cdf 100644 --- a/evals/tasks/observe_amazon_add_to_cart.ts +++ b/evals/tasks/observe_amazon_add_to_cart.ts @@ -31,7 +31,7 @@ export const observe_amazon_add_to_cart: EvalFunction = async ({ if (observations1.length > 0) { const action1 = observations1[0]; await performPlaywrightMethod( - stagehand.stagehandPage, + stagehand.page, stagehand.logger, action1.method, action1.arguments, @@ -51,7 +51,7 @@ export const observe_amazon_add_to_cart: EvalFunction = async ({ if (observations2.length > 0) { const action2 = observations2[0]; await performPlaywrightMethod( - stagehand.stagehandPage, + stagehand.page, stagehand.logger, action2.method, action2.arguments, diff --git a/evals/tasks/observe_simple_google_search.ts b/evals/tasks/observe_simple_google_search.ts index 1fcd1a7b..48e546dc 100644 --- a/evals/tasks/observe_simple_google_search.ts +++ b/evals/tasks/observe_simple_google_search.ts @@ -28,7 +28,7 @@ export const observe_simple_google_search: EvalFunction = async ({ if (observation1.length > 0) { const action1 = observation1[0]; await performPlaywrightMethod( - stagehand.stagehandPage, + stagehand.page, stagehand.logger, action1.method, action1.arguments, @@ -46,7 +46,7 @@ export const observe_simple_google_search: EvalFunction = async ({ if (observation2.length > 0) { const action2 = observation2[0]; await performPlaywrightMethod( - stagehand.stagehandPage, + stagehand.page, stagehand.logger, action2.method, action2.arguments, diff --git a/evals/tasks/observe_vantechjournal.ts b/evals/tasks/observe_vantechjournal.ts index 8169dd1a..3bbedba1 100644 --- a/evals/tasks/observe_vantechjournal.ts +++ b/evals/tasks/observe_vantechjournal.ts @@ -13,9 +13,7 @@ export const observe_vantechjournal: EvalFunction = async ({ const { debugUrl, sessionUrl } = initResponse; - await stagehand.page.goto( - "https://vantechjournal.com/archive?page=8", - ); + await stagehand.page.goto("https://vantechjournal.com/archive?page=8"); const observations = await stagehand.page.observe({ instruction: "find the button that takes us to the 11th page", diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index ca5a7479..f6b2bc00 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -224,15 +224,15 @@ export async function getXPathByResolvedObjectId( } export async function performPlaywrightMethod( - stagehandPage: StagehandPage, + stagehandPage: Page, logger: (logLine: LogLine) => void, method: string, args: unknown[], xpath: string, - domSettleTimeoutMs?: number, + // domSettleTimeoutMs?: number, ) { - const locator = stagehandPage.page.locator(`xpath=${xpath}`).first(); - const initialUrl = stagehandPage.page.url(); + const locator = stagehandPage.locator(`xpath=${xpath}`).first(); + const initialUrl = stagehandPage.url(); logger({ category: "action", @@ -317,7 +317,7 @@ export async function performPlaywrightMethod( await locator.click(); const text = args[0]?.toString(); for (const char of text) { - await stagehandPage.page.keyboard.type(char, { + await stagehandPage.keyboard.type(char, { delay: Math.random() * 50 + 25, }); } @@ -347,7 +347,7 @@ export async function performPlaywrightMethod( } else if (method === "press") { try { const key = args[0]?.toString(); - await stagehandPage.page.keyboard.press(key); + await stagehandPage.keyboard.press(key); } catch (e) { logger({ category: "action", @@ -379,7 +379,7 @@ export async function performPlaywrightMethod( level: 2, auxiliary: { url: { - value: stagehandPage.page.url(), + value: stagehandPage.url(), type: "string", }, }, @@ -441,10 +441,10 @@ export async function performPlaywrightMethod( // NAVIDNOTE: Should this happen before we wait for locator[method]? const newOpenedTab = await Promise.race([ new Promise((resolve) => { - // TODO: This is a hack to get the new page - // We should find a better way to do this - stagehandPage.context.once("page", (page) => resolve(page)); - setTimeout(() => resolve(null), 1_500); + Promise.resolve(stagehandPage.context()).then((context) => { + context.once("page", (page: Page) => resolve(page)); + setTimeout(() => resolve(null), 1_500); + }); }), ]); @@ -473,13 +473,13 @@ export async function performPlaywrightMethod( }, }); await newOpenedTab.close(); - await stagehandPage.page.goto(newOpenedTab.url()); - await stagehandPage.page.waitForLoadState("domcontentloaded"); - await stagehandPage._waitForSettledDom(domSettleTimeoutMs); + await stagehandPage.goto(newOpenedTab.url()); + await stagehandPage.waitForLoadState("domcontentloaded"); + // await stagehandPage._waitForSettledDom(domSettleTimeoutMs); } await Promise.race([ - stagehandPage.page.waitForLoadState("networkidle"), + stagehandPage.waitForLoadState("networkidle"), new Promise((resolve) => setTimeout(resolve, 5_000)), ]).catch((e) => { logger({ @@ -505,14 +505,14 @@ export async function performPlaywrightMethod( level: 1, }); - if (stagehandPage.page.url() !== initialUrl) { + if (stagehandPage.url() !== initialUrl) { logger({ category: "action", message: "new page detected with URL", level: 1, auxiliary: { url: { - value: stagehandPage.page.url(), + value: stagehandPage.url(), type: "string", }, }, @@ -537,5 +537,5 @@ export async function performPlaywrightMethod( ); } - await stagehandPage._waitForSettledDom(domSettleTimeoutMs); + // await stagehandPage._waitForSettledDom(domSettleTimeoutMs); } From b848d8f0a4f28a6b1095cfb75d72fc055de025b1 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 18:01:38 -0800 Subject: [PATCH 15/23] compare element handles --- evals/tasks/observe_github.ts | 25 +++++++++++++++++-------- evals/tasks/observe_vantechjournal.ts | 25 +++++++++++++++++-------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts index 200fd939..3b2f6f07 100644 --- a/evals/tasks/observe_github.ts +++ b/evals/tasks/observe_github.ts @@ -35,20 +35,29 @@ export const observe_github: EvalFunction = async ({ const expectedLocator = `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`; - const expectedResult = await stagehand.page - .locator(expectedLocator) - .first() - .innerText(); + const expectedResult = await stagehand.page.locator(expectedLocator); let foundMatch = false; + for (const observation of observations) { try { - const observationResult = await stagehand.page + const observationLocator = stagehand.page .locator(observation.selector) - .first() - .innerText(); + .first(); + const observationHandle = await observationLocator.elementHandle(); + const expectedHandle = await expectedResult.elementHandle(); + + if (!observationHandle || !expectedHandle) { + // Couldn’t get handles, skip + continue; + } + + const isSameNode = await observationHandle.evaluate( + (node, otherNode) => node === otherNode, + expectedHandle, + ); - if (observationResult === expectedResult) { + if (isSameNode) { foundMatch = true; break; } diff --git a/evals/tasks/observe_vantechjournal.ts b/evals/tasks/observe_vantechjournal.ts index 3bbedba1..d1204b2d 100644 --- a/evals/tasks/observe_vantechjournal.ts +++ b/evals/tasks/observe_vantechjournal.ts @@ -33,20 +33,29 @@ export const observe_vantechjournal: EvalFunction = async ({ const expectedLocator = `a.rounded-lg:nth-child(8)`; - const expectedResult = await stagehand.page - .locator(expectedLocator) - .first() - .innerText(); + const expectedResult = await stagehand.page.locator(expectedLocator); let foundMatch = false; + for (const observation of observations) { try { - const observationResult = await stagehand.page + const observationLocator = stagehand.page .locator(observation.selector) - .first() - .innerText(); + .first(); + const observationHandle = await observationLocator.elementHandle(); + const expectedHandle = await expectedResult.elementHandle(); + + if (!observationHandle || !expectedHandle) { + // Couldn’t get handles, skip + continue; + } + + const isSameNode = await observationHandle.evaluate( + (node, otherNode) => node === otherNode, + expectedHandle, + ); - if (observationResult === expectedResult) { + if (isSameNode) { foundMatch = true; break; } From 4f36762a71045771f9f62254fc96afd083424a80 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 27 Jan 2025 18:02:06 -0800 Subject: [PATCH 16/23] yc eval --- evals/evals.config.json | 4 ++ evals/tasks/observe_yc_startup.ts | 85 +++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 evals/tasks/observe_yc_startup.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index c02ff7e1..d87e69ba 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -231,6 +231,10 @@ { "name": "observe_simple_google_search", "categories": ["observe"] + }, + { + "name": "observe_yc_startup", + "categories": ["observe"] } ] } diff --git a/evals/tasks/observe_yc_startup.ts b/evals/tasks/observe_yc_startup.ts new file mode 100644 index 00000000..5b9cbd6c --- /dev/null +++ b/evals/tasks/observe_yc_startup.ts @@ -0,0 +1,85 @@ +import { initStagehand } from "@/evals/initStagehand"; +import { EvalFunction } from "@/types/evals"; + +export const observe_yc_startup: EvalFunction = async ({ + modelName, + logger, + useAccessibilityTree, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto("https://www.ycombinator.com/companies"); + await stagehand.page.waitForLoadState("networkidle"); + + const observations = await stagehand.page.observe({ + instruction: + "Find the container element that holds links to each of the startup companies. The companies each have a name, a description, and a link to their website.", + useAccessibilityTree, + }); + + console.log("observations", JSON.stringify(observations, null, 2)); + + if (observations.length === 0) { + await stagehand.close(); + return { + _success: false, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + const expectedLocator = "div._section_1pgsr_163._results_1pgsr_343"; + + const expectedResult = await stagehand.page.locator(expectedLocator); + + let foundMatch = false; + + for (const observation of observations) { + try { + const observationLocator = stagehand.page + .locator(observation.selector) + .first(); + const observationHandle = await observationLocator.elementHandle(); + const expectedHandle = await expectedResult.elementHandle(); + + if (!observationHandle || !expectedHandle) { + // Couldn’t get handles, skip + continue; + } + + const isSameNode = await observationHandle.evaluate( + (node, otherNode) => node === otherNode, + expectedHandle, + ); + + if (isSameNode) { + foundMatch = true; + break; + } + } catch (error) { + console.warn( + `Failed to check observation with selector ${observation.selector}:`, + error.message, + ); + continue; + } + } + + await stagehand.close(); + + return { + _success: foundMatch, + expected: expectedResult, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; From aee3681a14adf2b2ade420ffc11df4c1395acb4e Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 18:26:54 -0800 Subject: [PATCH 17/23] changed useAccessibilityTree to onlyVisible --- evals/tasks/ionwave_observe.ts | 8 ++---- evals/tasks/observe_amazon_add_to_cart.ts | 4 +-- evals/tasks/observe_simple_google_search.ts | 4 +-- evals/tasks/panamcs.ts | 8 ++---- evals/tasks/shopify_homepage.ts | 8 ++---- evals/tasks/vanta.ts | 8 ++---- evals/tasks/vanta_h.ts | 8 ++---- lib/StagehandPage.ts | 8 +++--- lib/handlers/observeHandler.ts | 28 ++++++++++----------- types/stagehand.ts | 2 +- 10 files changed, 33 insertions(+), 53 deletions(-) diff --git a/evals/tasks/ionwave_observe.ts b/evals/tasks/ionwave_observe.ts index 15cc2d8d..b0433342 100644 --- a/evals/tasks/ionwave_observe.ts +++ b/evals/tasks/ionwave_observe.ts @@ -1,11 +1,7 @@ import { initStagehand } from "@/evals/initStagehand"; import { EvalFunction } from "@/types/evals"; -export const ionwave_observe: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -15,7 +11,7 @@ export const ionwave_observe: EvalFunction = async ({ await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx"); - const observations = await stagehand.page.observe({ useAccessibilityTree }); + const observations = await stagehand.page.observe({ onlyVisible: true }); if (observations.length === 0) { await stagehand.close(); diff --git a/evals/tasks/observe_amazon_add_to_cart.ts b/evals/tasks/observe_amazon_add_to_cart.ts index 18595cdf..8be49da1 100644 --- a/evals/tasks/observe_amazon_add_to_cart.ts +++ b/evals/tasks/observe_amazon_add_to_cart.ts @@ -21,7 +21,7 @@ export const observe_amazon_add_to_cart: EvalFunction = async ({ const observations1 = await stagehand.page.observe({ instruction: "Find and click the 'Add to Cart' button", - useAccessibilityTree: true, + onlyVisible: false, returnAction: true, }); @@ -43,7 +43,7 @@ export const observe_amazon_add_to_cart: EvalFunction = async ({ const observations2 = await stagehand.page.observe({ instruction: "Find and click the 'Proceed to checkout' button", - useAccessibilityTree: true, + onlyVisible: false, returnAction: true, }); diff --git a/evals/tasks/observe_simple_google_search.ts b/evals/tasks/observe_simple_google_search.ts index 48e546dc..16eb38ff 100644 --- a/evals/tasks/observe_simple_google_search.ts +++ b/evals/tasks/observe_simple_google_search.ts @@ -20,7 +20,7 @@ export const observe_simple_google_search: EvalFunction = async ({ // }); const observation1 = await stagehand.page.observe({ instruction: "Find the search bar and enter 'OpenAI'", - useAccessibilityTree: true, + onlyVisible: false, returnAction: true, }); console.log(observation1); @@ -38,7 +38,7 @@ export const observe_simple_google_search: EvalFunction = async ({ await stagehand.page.waitForTimeout(5000); const observation2 = await stagehand.page.observe({ instruction: "Click the search button in the suggestions dropdown", - useAccessibilityTree: true, + onlyVisible: false, returnAction: true, }); console.log(observation2); diff --git a/evals/tasks/panamcs.ts b/evals/tasks/panamcs.ts index 330af12b..afc98c8f 100644 --- a/evals/tasks/panamcs.ts +++ b/evals/tasks/panamcs.ts @@ -1,11 +1,7 @@ import { EvalFunction } from "@/types/evals"; import { initStagehand } from "@/evals/initStagehand"; -export const panamcs: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const panamcs: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -15,7 +11,7 @@ export const panamcs: EvalFunction = async ({ await stagehand.page.goto("https://panamcs.org/about/staff/"); - const observations = await stagehand.page.observe({ useAccessibilityTree }); + const observations = await stagehand.page.observe({ onlyVisible: true }); if (observations.length === 0) { await stagehand.close(); diff --git a/evals/tasks/shopify_homepage.ts b/evals/tasks/shopify_homepage.ts index 271f4c56..e846422e 100644 --- a/evals/tasks/shopify_homepage.ts +++ b/evals/tasks/shopify_homepage.ts @@ -1,11 +1,7 @@ import { EvalFunction } from "@/types/evals"; import { initStagehand } from "@/evals/initStagehand"; -export const shopify_homepage: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const shopify_homepage: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -15,7 +11,7 @@ export const shopify_homepage: EvalFunction = async ({ await stagehand.page.goto("https://www.shopify.com/"); - const observations = await stagehand.page.observe({ useAccessibilityTree }); + const observations = await stagehand.page.observe({ onlyVisible: true }); if (observations.length === 0) { await stagehand.close(); diff --git a/evals/tasks/vanta.ts b/evals/tasks/vanta.ts index 73a7906c..2959389c 100644 --- a/evals/tasks/vanta.ts +++ b/evals/tasks/vanta.ts @@ -1,11 +1,7 @@ import { EvalFunction } from "@/types/evals"; import { initStagehand } from "@/evals/initStagehand"; -export const vanta: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const vanta: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -16,7 +12,7 @@ export const vanta: EvalFunction = async ({ await stagehand.page.goto("https://www.vanta.com/"); await stagehand.page.act({ action: "close the cookies popup" }); - const observations = await stagehand.page.observe({ useAccessibilityTree }); + const observations = await stagehand.page.observe({ onlyVisible: true }); if (observations.length === 0) { await stagehand.close(); diff --git a/evals/tasks/vanta_h.ts b/evals/tasks/vanta_h.ts index 606659c4..eca69a0f 100644 --- a/evals/tasks/vanta_h.ts +++ b/evals/tasks/vanta_h.ts @@ -1,11 +1,7 @@ import { EvalFunction } from "@/types/evals"; import { initStagehand } from "@/evals/initStagehand"; -export const vanta_h: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const vanta_h: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -17,7 +13,7 @@ export const vanta_h: EvalFunction = async ({ const observations = await stagehand.page.observe({ instruction: "find the buy now button if it is available", - useAccessibilityTree, + onlyVisible: true, }); await stagehand.close(); diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 0f183ebd..59c2b735 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -477,7 +477,7 @@ export class StagehandPage { useVision, // still destructure but will not pass it on domSettleTimeoutMs, returnAction = true, - useAccessibilityTree = true, + onlyVisible = false, } = options; if (typeof useVision !== "undefined") { @@ -511,8 +511,8 @@ export class StagehandPage { value: llmClient.modelName, type: "string", }, - useAccessibilityTree: { - value: useAccessibilityTree ? "true" : "false", + onlyVisible: { + value: onlyVisible ? "true" : "false", type: "boolean", }, }, @@ -525,7 +525,7 @@ export class StagehandPage { requestId, domSettleTimeoutMs, returnAction, - useAccessibilityTree, + onlyVisible, }) .catch((e) => { this.stagehand.log({ diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 5ee93ec8..9a16b258 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -54,14 +54,14 @@ export class StagehandObserveHandler { llmClient, requestId, returnAction, - useAccessibilityTree, + onlyVisible, }: { instruction: string; llmClient: LLMClient; requestId: string; domSettleTimeoutMs?: number; returnAction?: boolean; - useAccessibilityTree?: boolean; + onlyVisible?: boolean; }) { if (!instruction) { instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; @@ -80,7 +80,7 @@ export class StagehandObserveHandler { let selectorMap: Record = {}; let outputString: string; - + const useAccessibilityTree = !onlyVisible; if (useAccessibilityTree) { const tree = await getAccessibilityTree(this.stagehandPage, this.logger); this.logger({ @@ -149,17 +149,17 @@ export class StagehandObserveHandler { ); await this.stagehandPage.cleanupDomDebug(); - // this.logger({ - // category: "observation", - // message: "found elements", - // level: 1, - // auxiliary: { - // elements: { - // value: JSON.stringify(elementsWithSelectors), - // type: "object", - // }, - // }, - // }); + this.logger({ + category: "observation", + message: "found elements", + level: 1, + auxiliary: { + elements: { + value: JSON.stringify(elementsWithSelectors), + type: "object", + }, + }, + }); await this._recordObservation(instruction, elementsWithSelectors); return elementsWithSelectors; diff --git a/types/stagehand.ts b/types/stagehand.ts index 30e8ec3f..a088638e 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -90,7 +90,7 @@ export interface ObserveOptions { useVision?: boolean; domSettleTimeoutMs?: number; returnAction?: boolean; - useAccessibilityTree?: boolean; + onlyVisible?: boolean; } export interface ObserveResult { From a83705ecaa1019fea95e360e6647171a61de94f0 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 18:31:01 -0800 Subject: [PATCH 18/23] removing useAccessibilityTree from evals --- evals/tasks/observe_github.ts | 7 +------ evals/tasks/observe_vantechjournal.ts | 2 -- evals/tasks/observe_yc_startup.ts | 2 -- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts index 3b2f6f07..19cb5e0d 100644 --- a/evals/tasks/observe_github.ts +++ b/evals/tasks/observe_github.ts @@ -1,11 +1,7 @@ import { initStagehand } from "@/evals/initStagehand"; import { EvalFunction } from "@/types/evals"; -export const observe_github: EvalFunction = async ({ - modelName, - logger, - useAccessibilityTree, -}) => { +export const observe_github: EvalFunction = async ({ modelName, logger }) => { const { stagehand, initResponse } = await initStagehand({ modelName, logger, @@ -19,7 +15,6 @@ export const observe_github: EvalFunction = async ({ const observations = await stagehand.page.observe({ instruction: "find the scrollable element that holds the repos file tree", - useAccessibilityTree, }); if (observations.length === 0) { diff --git a/evals/tasks/observe_vantechjournal.ts b/evals/tasks/observe_vantechjournal.ts index d1204b2d..c097d3de 100644 --- a/evals/tasks/observe_vantechjournal.ts +++ b/evals/tasks/observe_vantechjournal.ts @@ -4,7 +4,6 @@ import { EvalFunction } from "@/types/evals"; export const observe_vantechjournal: EvalFunction = async ({ modelName, logger, - useAccessibilityTree, }) => { const { stagehand, initResponse } = await initStagehand({ modelName, @@ -17,7 +16,6 @@ export const observe_vantechjournal: EvalFunction = async ({ const observations = await stagehand.page.observe({ instruction: "find the button that takes us to the 11th page", - useAccessibilityTree, }); if (observations.length === 0) { diff --git a/evals/tasks/observe_yc_startup.ts b/evals/tasks/observe_yc_startup.ts index 5b9cbd6c..63c6ef92 100644 --- a/evals/tasks/observe_yc_startup.ts +++ b/evals/tasks/observe_yc_startup.ts @@ -4,7 +4,6 @@ import { EvalFunction } from "@/types/evals"; export const observe_yc_startup: EvalFunction = async ({ modelName, logger, - useAccessibilityTree, }) => { const { stagehand, initResponse } = await initStagehand({ modelName, @@ -19,7 +18,6 @@ export const observe_yc_startup: EvalFunction = async ({ const observations = await stagehand.page.observe({ instruction: "Find the container element that holds links to each of the startup companies. The companies each have a name, a description, and a link to their website.", - useAccessibilityTree, }); console.log("observations", JSON.stringify(observations, null, 2)); From 299eebf76fda15b04c7c93f2b30e23667567bfe9 Mon Sep 17 00:00:00 2001 From: Miguel Date: Mon, 27 Jan 2025 20:48:07 -0800 Subject: [PATCH 19/23] mostly removing comments --- lib/a11y/utils.ts | 1 - lib/handlers/observeHandler.ts | 12 +----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index f6b2bc00..9adc8160 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -438,7 +438,6 @@ export async function performPlaywrightMethod( }, }); - // NAVIDNOTE: Should this happen before we wait for locator[method]? const newOpenedTab = await Promise.race([ new Promise((resolve) => { Promise.resolve(stagehandPage.context()).then((context) => { diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 9a16b258..7444147c 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -112,11 +112,6 @@ export class StagehandObserveHandler { const { elementId, ...rest } = element; if (useAccessibilityTree) { - // const index = Object.entries(backendNodeIdMap).find( - // ([, value]) => value === elementId, - // )?.[0]; - // if (!index || !selectorMap[index]?.[0]) { - // Generate xpath for the given element if not found in selectorMap const { object } = await this.stagehandPage.sendCDP<{ object: { objectId: string }; @@ -130,14 +125,9 @@ export class StagehandObserveHandler { return { ...rest, selector: `xpath=${xpath}`, + // Provisioning or future use if we want to use direct CDP // backendNodeId: elementId, }; - // } - // return { - // ...rest, - // selector: `xpath=${selectorMap[index][0]}`, - // // backendNodeId: elementId, - // }; } return { From 3e6ff436442e2bcd29b0da2a3c10892facfccca4 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Tue, 28 Jan 2025 09:25:02 -0800 Subject: [PATCH 20/23] accept multiple selectors --- evals/tasks/observe_github.ts | 40 +++++++++++++++++++---------- evals/tasks/observe_yc_startup.ts | 42 ++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 28 deletions(-) diff --git a/evals/tasks/observe_github.ts b/evals/tasks/observe_github.ts index 19cb5e0d..1effed68 100644 --- a/evals/tasks/observe_github.ts +++ b/evals/tasks/observe_github.ts @@ -28,11 +28,22 @@ export const observe_github: EvalFunction = async ({ modelName, logger }) => { }; } - const expectedLocator = `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`; + const possibleLocators = [ + `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`, + `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav`, + ]; - const expectedResult = await stagehand.page.locator(expectedLocator); + const possibleHandles = []; + for (const locatorStr of possibleLocators) { + const locator = stagehand.page.locator(locatorStr); + const handle = await locator.elementHandle(); + if (handle) { + possibleHandles.push({ locatorStr, handle }); + } + } let foundMatch = false; + let matchedLocator: string | null = null; for (const observation of observations) { try { @@ -40,20 +51,23 @@ export const observe_github: EvalFunction = async ({ modelName, logger }) => { .locator(observation.selector) .first(); const observationHandle = await observationLocator.elementHandle(); - const expectedHandle = await expectedResult.elementHandle(); - - if (!observationHandle || !expectedHandle) { - // Couldn’t get handles, skip + if (!observationHandle) { continue; } - const isSameNode = await observationHandle.evaluate( - (node, otherNode) => node === otherNode, - expectedHandle, - ); + for (const { locatorStr, handle: candidateHandle } of possibleHandles) { + const isSameNode = await observationHandle.evaluate( + (node, otherNode) => node === otherNode, + candidateHandle, + ); + if (isSameNode) { + foundMatch = true; + matchedLocator = locatorStr; + break; + } + } - if (isSameNode) { - foundMatch = true; + if (foundMatch) { break; } } catch (error) { @@ -69,7 +83,7 @@ export const observe_github: EvalFunction = async ({ modelName, logger }) => { return { _success: foundMatch, - expected: expectedResult, + matchedLocator, observations, debugUrl, sessionUrl, diff --git a/evals/tasks/observe_yc_startup.ts b/evals/tasks/observe_yc_startup.ts index 63c6ef92..913f398f 100644 --- a/evals/tasks/observe_yc_startup.ts +++ b/evals/tasks/observe_yc_startup.ts @@ -20,8 +20,6 @@ export const observe_yc_startup: EvalFunction = async ({ "Find the container element that holds links to each of the startup companies. The companies each have a name, a description, and a link to their website.", }); - console.log("observations", JSON.stringify(observations, null, 2)); - if (observations.length === 0) { await stagehand.close(); return { @@ -33,11 +31,22 @@ export const observe_yc_startup: EvalFunction = async ({ }; } - const expectedLocator = "div._section_1pgsr_163._results_1pgsr_343"; + const possibleLocators = [ + `div._section_1pgsr_163._results_1pgsr_343`, + `div._rightCol_1pgsr_592`, + ]; - const expectedResult = await stagehand.page.locator(expectedLocator); + const possibleHandles = []; + for (const locatorStr of possibleLocators) { + const locator = stagehand.page.locator(locatorStr); + const handle = await locator.elementHandle(); + if (handle) { + possibleHandles.push({ locatorStr, handle }); + } + } let foundMatch = false; + let matchedLocator: string | null = null; for (const observation of observations) { try { @@ -45,20 +54,23 @@ export const observe_yc_startup: EvalFunction = async ({ .locator(observation.selector) .first(); const observationHandle = await observationLocator.elementHandle(); - const expectedHandle = await expectedResult.elementHandle(); - - if (!observationHandle || !expectedHandle) { - // Couldn’t get handles, skip + if (!observationHandle) { continue; } - const isSameNode = await observationHandle.evaluate( - (node, otherNode) => node === otherNode, - expectedHandle, - ); + for (const { locatorStr, handle: candidateHandle } of possibleHandles) { + const isSameNode = await observationHandle.evaluate( + (node, otherNode) => node === otherNode, + candidateHandle, + ); + if (isSameNode) { + foundMatch = true; + matchedLocator = locatorStr; + break; + } + } - if (isSameNode) { - foundMatch = true; + if (foundMatch) { break; } } catch (error) { @@ -74,7 +86,7 @@ export const observe_yc_startup: EvalFunction = async ({ return { _success: foundMatch, - expected: expectedResult, + matchedLocator, observations, debugUrl, sessionUrl, From a3261c749caff35586aad69e26bd03b2116fde77 Mon Sep 17 00:00:00 2001 From: Miguel Date: Tue, 28 Jan 2025 10:30:51 -0800 Subject: [PATCH 21/23] added changeset --- .changeset/chilled-jokes-teach.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/chilled-jokes-teach.md diff --git a/.changeset/chilled-jokes-teach.md b/.changeset/chilled-jokes-teach.md new file mode 100644 index 00000000..207ed3c7 --- /dev/null +++ b/.changeset/chilled-jokes-teach.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +Observe got a major upgrade. Now it will return a suggested playwright method with any necessary arguments for the generated candidate elements. It also includes a major speedup when using a11y tree processing for context. From 911572943c5f1a472c7c7f835885d23761cc3123 Mon Sep 17 00:00:00 2001 From: Miguel Date: Tue, 28 Jan 2025 14:47:30 -0800 Subject: [PATCH 22/23] return action defaults to false, waitForSettledDom before getAccessibilityTree --- lib/StagehandPage.ts | 2 +- lib/handlers/observeHandler.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 59c2b735..e5239a4e 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -476,7 +476,7 @@ export class StagehandPage { modelClientOptions, useVision, // still destructure but will not pass it on domSettleTimeoutMs, - returnAction = true, + returnAction = false, onlyVisible = false, } = options; diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 7444147c..89d09214 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -82,6 +82,7 @@ export class StagehandObserveHandler { let outputString: string; const useAccessibilityTree = !onlyVisible; if (useAccessibilityTree) { + await this.stagehandPage._waitForSettledDom(); const tree = await getAccessibilityTree(this.stagehandPage, this.logger); this.logger({ category: "observation", From 9776827e7da4d829d97b594431b5c47da2093227 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 29 Jan 2025 13:53:57 -0800 Subject: [PATCH 23/23] fixes to xpath generation and more evals (observe form fields) --- evals/evals.config.json | 4 ++ evals/tasks/observe_taxes.ts | 76 +++++++++++++++++++++++++++ evals/tasks/observe_vantechjournal.ts | 1 + lib/a11y/utils.ts | 45 ++++++++-------- 4 files changed, 104 insertions(+), 22 deletions(-) create mode 100644 evals/tasks/observe_taxes.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index d87e69ba..9e4b11d1 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -235,6 +235,10 @@ { "name": "observe_yc_startup", "categories": ["observe"] + }, + { + "name": "observe_taxes", + "categories": ["observe"] } ] } diff --git a/evals/tasks/observe_taxes.ts b/evals/tasks/observe_taxes.ts new file mode 100644 index 00000000..33a7a85e --- /dev/null +++ b/evals/tasks/observe_taxes.ts @@ -0,0 +1,76 @@ +import { EvalFunction } from "@/types/evals"; +import { initStagehand } from "@/evals/initStagehand"; + +export const observe_taxes: EvalFunction = async ({ modelName, logger }) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + await stagehand.page.goto("https://file.1040.com/estimate/"); + + const observations = await stagehand.page.observe({ + instruction: "Find all the form elements under the 'Income' section", + }); + + if (observations.length === 0) { + await stagehand.close(); + return { + _success: false, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } else if (observations.length < 13) { + await stagehand.close(); + return { + _success: false, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + const expectedLocator = `#tpWages`; + + const expectedResult = await stagehand.page + .locator(expectedLocator) + .first() + .innerText(); + + let foundMatch = false; + for (const observation of observations) { + try { + const observationResult = await stagehand.page + .locator(observation.selector) + .first() + .innerText(); + + if (observationResult === expectedResult) { + foundMatch = true; + break; + } + } catch (error) { + console.warn( + `Failed to check observation with selector ${observation.selector}:`, + error.message, + ); + continue; + } + } + + await stagehand.close(); + + return { + _success: foundMatch, + expected: expectedResult, + observations, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; +}; diff --git a/evals/tasks/observe_vantechjournal.ts b/evals/tasks/observe_vantechjournal.ts index c097d3de..4ca7dbbf 100644 --- a/evals/tasks/observe_vantechjournal.ts +++ b/evals/tasks/observe_vantechjournal.ts @@ -13,6 +13,7 @@ export const observe_vantechjournal: EvalFunction = async ({ const { debugUrl, sessionUrl } = initResponse; await stagehand.page.goto("https://vantechjournal.com/archive?page=8"); + await stagehand.page.waitForTimeout(1000); const observations = await stagehand.page.observe({ instruction: "find the button that takes us to the 11th page", diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 9adc8160..fcbb9e17 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -177,32 +177,33 @@ export async function getAccessibilityTree( // This function is wrapped into a string and sent as a CDP command // It is not meant to be actually executed here -function getNodePath(node: Element) { - const parts = []; - let current = node; - - while (current && current.parentNode) { - if (current.nodeType === Node.ELEMENT_NODE) { - let tagName = current.tagName.toLowerCase(); - const sameTagSiblings = Array.from(current.parentNode.children).filter( - (child) => child.tagName === current.tagName, - ); - - if (sameTagSiblings.length > 1) { - let index = 1; - for (const sibling of sameTagSiblings) { - if (sibling === current) break; - index++; - } - tagName += "[" + index + "]"; +function getNodePath(el: Element) { + if (!el || el.nodeType !== Node.ELEMENT_NODE) return ""; + const pathSegments = []; + let current = el; + while (current && current.nodeType === Node.ELEMENT_NODE) { + const tagName = current.nodeName.toLowerCase(); + let index = 1; + let sibling = current.previousSibling; + while (sibling) { + if ( + sibling.nodeType === Node.ELEMENT_NODE && + sibling.nodeName.toLowerCase() === tagName + ) { + index++; } - - parts.unshift(tagName); + sibling = sibling.previousSibling; } + const segment = index > 1 ? tagName + "[" + index + "]" : tagName; + pathSegments.unshift(segment); current = current.parentNode as Element; + if (!current || !current.parentNode) break; + if (current.nodeName.toLowerCase() === "html") { + pathSegments.unshift("html"); + break; + } } - - return "/" + parts.join("/"); + return "/" + pathSegments.join("/"); } const functionString = getNodePath.toString();