Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Observe perform candidates #426

Merged
merged 26 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
555f54c
flagged return action v0.1
miguelg719 Jan 21, 2025
d64a1a8
isVisible flag added to observe
miguelg719 Jan 23, 2025
cd8a4f2
prettier and cleanup of fs functions
miguelg719 Jan 23, 2025
19f92aa
simplifying observe flagging
miguelg719 Jan 27, 2025
4b8c728
two flags: useAccessibilityTree and returnAction. No more processAlDO…
miguelg719 Jan 27, 2025
de526e5
github eval
seanmcguire12 Jan 27, 2025
a8e4426
add back in a11y param (whoops)
seanmcguire12 Jan 27, 2025
e671aba
google search observe eval (returnAction)
miguelg719 Jan 27, 2025
dd35199
fix my terrible grammar in the instruction
seanmcguire12 Jan 27, 2025
f7d114c
Merge remote-tracking branch 'origin/observe_perform_candidates' into…
seanmcguire12 Jan 27, 2025
f402619
amazon actionable eval
miguelg719 Jan 27, 2025
03c8f41
Merge remote-tracking branch 'origin/main' into observe_perform_candi…
seanmcguire12 Jan 27, 2025
8b78b1a
Merge remote-tracking branch 'origin/observe_perform_candidates' into…
seanmcguire12 Jan 27, 2025
ddfcc1e
add gh eval to config
seanmcguire12 Jan 27, 2025
8dbe06f
vtj eval
seanmcguire12 Jan 27, 2025
c66449b
added evals to config.json
miguelg719 Jan 28, 2025
f6039d2
fixing lint/build issues
miguelg719 Jan 28, 2025
b848d8f
compare element handles
seanmcguire12 Jan 28, 2025
4f36762
yc eval
seanmcguire12 Jan 28, 2025
aee3681
changed useAccessibilityTree to onlyVisible
miguelg719 Jan 28, 2025
a83705e
removing useAccessibilityTree from evals
miguelg719 Jan 28, 2025
299eebf
mostly removing comments
miguelg719 Jan 28, 2025
3e6ff43
accept multiple selectors
seanmcguire12 Jan 28, 2025
a3261c7
added changeset
miguelg719 Jan 28, 2025
9115729
return action defaults to false, waitForSettledDom before getAccessib…
miguelg719 Jan 28, 2025
9776827
fixes to xpath generation and more evals (observe form fields)
miguelg719 Jan 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions evals/evals.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,22 @@
{
"name": "extract_zillow",
"categories": ["text_extract"]
},
{
"name": "observe_github",
"categories": ["observe"]
},
{
"name": "observe_vantechjournal",
"categories": ["observe"]
},
{
"name": "observe_amazon_add_to_cart",
"categories": ["observe"]
},
{
"name": "observe_simple_google_search",
"categories": ["observe"]
}
]
}
75 changes: 75 additions & 0 deletions evals/tasks/observe_amazon_add_to_cart.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { EvalFunction } from "@/types/evals";
import { initStagehand } from "@/evals/initStagehand";
import { performPlaywrightMethod } from "@/lib/a11y/utils";

export const observe_amazon_add_to_cart: EvalFunction = async ({
modelName,
logger,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto(
"https://www.amazon.com/Laptop-MacBook-Surface-Water-Resistant-Accessories/dp/B0D5M4H5CD",
);

await stagehand.page.waitForTimeout(5000);

const observations1 = await stagehand.page.observe({
instruction: "Find and click the 'Add to Cart' button",
useAccessibilityTree: true,
returnAction: true,
});

console.log(observations1);

// Example of using performPlaywrightMethod if you have the xpath
if (observations1.length > 0) {
const action1 = observations1[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action1.method,
action1.arguments,
action1.selector.replace("xpath=", ""),
);
}

await stagehand.page.waitForTimeout(2000);

const observations2 = await stagehand.page.observe({
instruction: "Find and click the 'Proceed to checkout' button",
useAccessibilityTree: true,
returnAction: true,
});

// Example of using performPlaywrightMethod if you have the xpath
if (observations2.length > 0) {
const action2 = observations2[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action2.method,
action2.arguments,
action2.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(2000);

const currentUrl = stagehand.page.url();
const expectedUrlPrefix = "https://www.amazon.com/ap/signin";

await stagehand.close();

return {
_success: currentUrl.startsWith(expectedUrlPrefix),
currentUrl,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
74 changes: 74 additions & 0 deletions evals/tasks/observe_github.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { initStagehand } from "@/evals/initStagehand";
import { EvalFunction } from "@/types/evals";

export const observe_github: EvalFunction = async ({
modelName,
logger,
useAccessibilityTree,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto(
"https://github.com/browserbase/stagehand/tree/main/lib",
);

const observations = await stagehand.page.observe({
instruction: "find the scrollable element that holds the repos file tree",
useAccessibilityTree,
});

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const expectedLocator = `#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerText();

let foundMatch = false;
for (const observation of observations) {
try {
const observationResult = await stagehand.page
.locator(observation.selector)
.first()
.innerText();

if (observationResult === expectedResult) {
foundMatch = true;
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
expected: expectedResult,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
70 changes: 70 additions & 0 deletions evals/tasks/observe_simple_google_search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { EvalFunction } from "@/types/evals";
import { initStagehand } from "@/evals/initStagehand";
import { performPlaywrightMethod } from "@/lib/a11y/utils";

export const observe_simple_google_search: EvalFunction = async ({
modelName,
logger,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://www.google.com");

// await stagehand.page.act({
// action: 'Search for "OpenAI"',
// });
const observation1 = await stagehand.page.observe({
instruction: "Find the search bar and enter 'OpenAI'",
useAccessibilityTree: true,
returnAction: true,
});
console.log(observation1);

if (observation1.length > 0) {
const action1 = observation1[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action1.method,
action1.arguments,
action1.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(5000);
const observation2 = await stagehand.page.observe({
instruction: "Click the search button in the suggestions dropdown",
useAccessibilityTree: true,
returnAction: true,
});
console.log(observation2);

if (observation2.length > 0) {
const action2 = observation2[0];
await performPlaywrightMethod(
stagehand.page,
stagehand.logger,
action2.method,
action2.arguments,
action2.selector.replace("xpath=", ""),
);
}
await stagehand.page.waitForTimeout(5000);

const expectedUrl = "https://www.google.com/search?q=OpenAI";
const currentUrl = stagehand.page.url();

await stagehand.close();

return {
_success: currentUrl.startsWith(expectedUrl),
currentUrl,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
72 changes: 72 additions & 0 deletions evals/tasks/observe_vantechjournal.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { initStagehand } from "@/evals/initStagehand";
import { EvalFunction } from "@/types/evals";

export const observe_vantechjournal: EvalFunction = async ({
modelName,
logger,
useAccessibilityTree,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://vantechjournal.com/archive?page=8");

const observations = await stagehand.page.observe({
instruction: "find the button that takes us to the 11th page",
useAccessibilityTree,
});

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const expectedLocator = `a.rounded-lg:nth-child(8)`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerText();

let foundMatch = false;
for (const observation of observations) {
try {
const observationResult = await stagehand.page
.locator(observation.selector)
.first()
.innerText();

if (observationResult === expectedResult) {
foundMatch = true;
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
expected: expectedResult,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
6 changes: 4 additions & 2 deletions lib/StagehandPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -471,12 +471,13 @@ export class StagehandPage {
: instructionOrOptions || {};

const {
instruction = "Find actions that can be performed on this page.",
miguelg719 marked this conversation as resolved.
Show resolved Hide resolved
instruction,
modelName,
modelClientOptions,
useVision, // still destructure but will not pass it on
domSettleTimeoutMs,
useAccessibilityTree = false,
returnAction = true,
useAccessibilityTree = true,
} = options;

if (typeof useVision !== "undefined") {
Expand Down Expand Up @@ -523,6 +524,7 @@ export class StagehandPage {
llmClient,
requestId,
domSettleTimeoutMs,
returnAction,
miguelg719 marked this conversation as resolved.
Show resolved Hide resolved
useAccessibilityTree,
})
.catch((e) => {
Expand Down
Loading
Loading