Skip to content

Commit

Permalink
Observe perform candidates (#426)
Browse files Browse the repository at this point in the history
* flagged return action v0.1

* isVisible flag added to observe

* prettier and cleanup of fs functions

* simplifying observe flagging

* two flags: useAccessibilityTree and returnAction. No more processAlDOM on a11y context

* github eval

* add back in a11y param (whoops)

* google search observe eval (returnAction)

* fix my terrible grammar in the instruction

* amazon actionable eval

* add gh eval to config

* vtj eval

* added evals to config.json

* fixing lint/build issues

* compare element handles

* yc eval

* changed useAccessibilityTree to onlyVisible

* removing useAccessibilityTree from evals

* mostly removing comments

* accept multiple selectors

* added changeset

* return action defaults to false, waitForSettledDom before getAccessibilityTree

* fixes to xpath generation and more evals (observe form fields)

---------

Co-authored-by: seanmcguire12 <[email protected]>
  • Loading branch information
miguelg719 and seanmcguire12 authored Jan 29, 2025
1 parent 8e84664 commit bbbcee7
Show file tree
Hide file tree
Showing 18 changed files with 943 additions and 139 deletions.
5 changes: 5 additions & 0 deletions .changeset/chilled-jokes-teach.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": minor
---

Observe got a major upgrade. Now it will return a suggested playwright method with any necessary arguments for the generated candidate elements. It also includes a major speedup when using a11y tree processing for context.
24 changes: 24 additions & 0 deletions evals/evals.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,30 @@
{
"name": "extract_zillow",
"categories": ["text_extract"]
},
{
"name": "observe_github",
"categories": ["observe"]
},
{
"name": "observe_vantechjournal",
"categories": ["observe"]
},
{
"name": "observe_amazon_add_to_cart",
"categories": ["observe"]
},
{
"name": "observe_simple_google_search",
"categories": ["observe"]
},
{
"name": "observe_yc_startup",
"categories": ["observe"]
},
{
"name": "observe_taxes",
"categories": ["observe"]
}
]
}
8 changes: 2 additions & 6 deletions evals/tasks/ionwave_observe.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import { initStagehand } from "@/evals/initStagehand";
import { EvalFunction } from "@/types/evals";

export const ionwave_observe: EvalFunction = async ({
modelName,
logger,
useAccessibilityTree,
}) => {
export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
Expand All @@ -15,7 +11,7 @@ export const ionwave_observe: EvalFunction = async ({

await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");

const observations = await stagehand.page.observe({ useAccessibilityTree });
const observations = await stagehand.page.observe({ onlyVisible: true });

if (observations.length === 0) {
await stagehand.close();
Expand Down
75 changes: 75 additions & 0 deletions evals/tasks/observe_amazon_add_to_cart.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { EvalFunction } from "@/types/evals";
import { initStagehand } from "@/evals/initStagehand";
import { performPlaywrightMethod } from "@/lib/a11y/utils";

export const observe_amazon_add_to_cart: EvalFunction = async ({
modelName,
logger,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto(
"https://www.amazon.com/Laptop-MacBook-Surface-Water-Resistant-Accessories/dp/B0D5M4H5CD",
);

await stagehand.page.waitForTimeout(5000);

const observations1 = await stagehand.page.observe({
instruction: "Find and click the 'Add to Cart' button",
onlyVisible: false,
returnAction: true,
});

console.log(observations1);

// Example of using performPlaywrightMethod if you have the xpath
if (observations1.length > 0) {
const action1 = observations1[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action1.method,
action1.arguments,
action1.selector.replace("xpath=", ""),
);
}

await stagehand.page.waitForTimeout(2000);

const observations2 = await stagehand.page.observe({
instruction: "Find and click the 'Proceed to checkout' button",
onlyVisible: false,
returnAction: true,
});

// Example of using performPlaywrightMethod if you have the xpath
if (observations2.length > 0) {
const action2 = observations2[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action2.method,
action2.arguments,
action2.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(2000);

const currentUrl = stagehand.page.url();
const expectedUrlPrefix = "https://www.amazon.com/ap/signin";

await stagehand.close();

return {
_success: currentUrl.startsWith(expectedUrlPrefix),
currentUrl,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
92 changes: 92 additions & 0 deletions evals/tasks/observe_github.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { initStagehand } from "@/evals/initStagehand";
import { EvalFunction } from "@/types/evals";

export const observe_github: EvalFunction = async ({ modelName, logger }) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto(
"https://github.com/browserbase/stagehand/tree/main/lib",
);

const observations = await stagehand.page.observe({
instruction: "find the scrollable element that holds the repos file tree",
});

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const possibleLocators = [
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`,
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav`,
];

const possibleHandles = [];
for (const locatorStr of possibleLocators) {
const locator = stagehand.page.locator(locatorStr);
const handle = await locator.elementHandle();
if (handle) {
possibleHandles.push({ locatorStr, handle });
}
}

let foundMatch = false;
let matchedLocator: string | null = null;

for (const observation of observations) {
try {
const observationLocator = stagehand.page
.locator(observation.selector)
.first();
const observationHandle = await observationLocator.elementHandle();
if (!observationHandle) {
continue;
}

for (const { locatorStr, handle: candidateHandle } of possibleHandles) {
const isSameNode = await observationHandle.evaluate(
(node, otherNode) => node === otherNode,
candidateHandle,
);
if (isSameNode) {
foundMatch = true;
matchedLocator = locatorStr;
break;
}
}

if (foundMatch) {
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
matchedLocator,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
70 changes: 70 additions & 0 deletions evals/tasks/observe_simple_google_search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { EvalFunction } from "@/types/evals";
import { initStagehand } from "@/evals/initStagehand";
import { performPlaywrightMethod } from "@/lib/a11y/utils";

export const observe_simple_google_search: EvalFunction = async ({
modelName,
logger,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://www.google.com");

// await stagehand.page.act({
// action: 'Search for "OpenAI"',
// });
const observation1 = await stagehand.page.observe({
instruction: "Find the search bar and enter 'OpenAI'",
onlyVisible: false,
returnAction: true,
});
console.log(observation1);

if (observation1.length > 0) {
const action1 = observation1[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action1.method,
action1.arguments,
action1.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(5000);
const observation2 = await stagehand.page.observe({
instruction: "Click the search button in the suggestions dropdown",
onlyVisible: false,
returnAction: true,
});
console.log(observation2);

if (observation2.length > 0) {
const action2 = observation2[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action2.method,
action2.arguments,
action2.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(5000);

const expectedUrl = "https://www.google.com/search?q=OpenAI";
const currentUrl = stagehand.page.url();

await stagehand.close();

return {
_success: currentUrl.startsWith(expectedUrl),
currentUrl,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
76 changes: 76 additions & 0 deletions evals/tasks/observe_taxes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { EvalFunction } from "@/types/evals";
import { initStagehand } from "@/evals/initStagehand";

export const observe_taxes: EvalFunction = async ({ modelName, logger }) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://file.1040.com/estimate/");

const observations = await stagehand.page.observe({
instruction: "Find all the form elements under the 'Income' section",
});

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} else if (observations.length < 13) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const expectedLocator = `#tpWages`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerText();

let foundMatch = false;
for (const observation of observations) {
try {
const observationResult = await stagehand.page
.locator(observation.selector)
.first()
.innerText();

if (observationResult === expectedResult) {
foundMatch = true;
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
expected: expectedResult,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
Loading

0 comments on commit bbbcee7

Please sign in to comment.