-
Notifications
You must be signed in to change notification settings - Fork 347
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* flagged return action v0.1 * isVisible flag added to observe * prettier and cleanup of fs functions * simplifying observe flagging * two flags: useAccessibilityTree and returnAction. No more processAlDOM on a11y context * github eval * add back in a11y param (whoops) * google search observe eval (returnAction) * fix my terrible grammar in the instruction * amazon actionable eval * add gh eval to config * vtj eval * added evals to config.json * fixing lint/build issues * compare element handles * yc eval * changed useAccessibilityTree to onlyVisible * removing useAccessibilityTree from evals * mostly removing comments * accept multiple selectors * added changeset * return action defaults to false, waitForSettledDom before getAccessibilityTree * fixes to xpath generation and more evals (observe form fields) --------- Co-authored-by: seanmcguire12 <[email protected]>
- Loading branch information
1 parent
8e84664
commit bbbcee7
Showing
18 changed files
with
943 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@browserbasehq/stagehand": minor | ||
--- | ||
|
||
Observe got a major upgrade. Now it will return a suggested playwright method with any necessary arguments for the generated candidate elements. It also includes a major speedup when using a11y tree processing for context. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import { EvalFunction } from "@/types/evals"; | ||
import { initStagehand } from "@/evals/initStagehand"; | ||
import { performPlaywrightMethod } from "@/lib/a11y/utils"; | ||
|
||
export const observe_amazon_add_to_cart: EvalFunction = async ({ | ||
modelName, | ||
logger, | ||
}) => { | ||
const { stagehand, initResponse } = await initStagehand({ | ||
modelName, | ||
logger, | ||
}); | ||
|
||
const { debugUrl, sessionUrl } = initResponse; | ||
|
||
await stagehand.page.goto( | ||
"https://www.amazon.com/Laptop-MacBook-Surface-Water-Resistant-Accessories/dp/B0D5M4H5CD", | ||
); | ||
|
||
await stagehand.page.waitForTimeout(5000); | ||
|
||
const observations1 = await stagehand.page.observe({ | ||
instruction: "Find and click the 'Add to Cart' button", | ||
onlyVisible: false, | ||
returnAction: true, | ||
}); | ||
|
||
console.log(observations1); | ||
|
||
// Example of using performPlaywrightMethod if you have the xpath | ||
if (observations1.length > 0) { | ||
const action1 = observations1[0]; | ||
await performPlaywrightMethod( | ||
stagehand.page, | ||
stagehand.logger, | ||
action1.method, | ||
action1.arguments, | ||
action1.selector.replace("xpath=", ""), | ||
); | ||
} | ||
|
||
await stagehand.page.waitForTimeout(2000); | ||
|
||
const observations2 = await stagehand.page.observe({ | ||
instruction: "Find and click the 'Proceed to checkout' button", | ||
onlyVisible: false, | ||
returnAction: true, | ||
}); | ||
|
||
// Example of using performPlaywrightMethod if you have the xpath | ||
if (observations2.length > 0) { | ||
const action2 = observations2[0]; | ||
await performPlaywrightMethod( | ||
stagehand.page, | ||
stagehand.logger, | ||
action2.method, | ||
action2.arguments, | ||
action2.selector.replace("xpath=", ""), | ||
); | ||
} | ||
await stagehand.page.waitForTimeout(2000); | ||
|
||
const currentUrl = stagehand.page.url(); | ||
const expectedUrlPrefix = "https://www.amazon.com/ap/signin"; | ||
|
||
await stagehand.close(); | ||
|
||
return { | ||
_success: currentUrl.startsWith(expectedUrlPrefix), | ||
currentUrl, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import { initStagehand } from "@/evals/initStagehand"; | ||
import { EvalFunction } from "@/types/evals"; | ||
|
||
export const observe_github: EvalFunction = async ({ modelName, logger }) => { | ||
const { stagehand, initResponse } = await initStagehand({ | ||
modelName, | ||
logger, | ||
}); | ||
|
||
const { debugUrl, sessionUrl } = initResponse; | ||
|
||
await stagehand.page.goto( | ||
"https://github.com/browserbase/stagehand/tree/main/lib", | ||
); | ||
|
||
const observations = await stagehand.page.observe({ | ||
instruction: "find the scrollable element that holds the repos file tree", | ||
}); | ||
|
||
if (observations.length === 0) { | ||
await stagehand.close(); | ||
return { | ||
_success: false, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
} | ||
|
||
const possibleLocators = [ | ||
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`, | ||
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav`, | ||
]; | ||
|
||
const possibleHandles = []; | ||
for (const locatorStr of possibleLocators) { | ||
const locator = stagehand.page.locator(locatorStr); | ||
const handle = await locator.elementHandle(); | ||
if (handle) { | ||
possibleHandles.push({ locatorStr, handle }); | ||
} | ||
} | ||
|
||
let foundMatch = false; | ||
let matchedLocator: string | null = null; | ||
|
||
for (const observation of observations) { | ||
try { | ||
const observationLocator = stagehand.page | ||
.locator(observation.selector) | ||
.first(); | ||
const observationHandle = await observationLocator.elementHandle(); | ||
if (!observationHandle) { | ||
continue; | ||
} | ||
|
||
for (const { locatorStr, handle: candidateHandle } of possibleHandles) { | ||
const isSameNode = await observationHandle.evaluate( | ||
(node, otherNode) => node === otherNode, | ||
candidateHandle, | ||
); | ||
if (isSameNode) { | ||
foundMatch = true; | ||
matchedLocator = locatorStr; | ||
break; | ||
} | ||
} | ||
|
||
if (foundMatch) { | ||
break; | ||
} | ||
} catch (error) { | ||
console.warn( | ||
`Failed to check observation with selector ${observation.selector}:`, | ||
error.message, | ||
); | ||
continue; | ||
} | ||
} | ||
|
||
await stagehand.close(); | ||
|
||
return { | ||
_success: foundMatch, | ||
matchedLocator, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import { EvalFunction } from "@/types/evals"; | ||
import { initStagehand } from "@/evals/initStagehand"; | ||
import { performPlaywrightMethod } from "@/lib/a11y/utils"; | ||
|
||
export const observe_simple_google_search: EvalFunction = async ({ | ||
modelName, | ||
logger, | ||
}) => { | ||
const { stagehand, initResponse } = await initStagehand({ | ||
modelName, | ||
logger, | ||
}); | ||
|
||
const { debugUrl, sessionUrl } = initResponse; | ||
|
||
await stagehand.page.goto("https://www.google.com"); | ||
|
||
// await stagehand.page.act({ | ||
// action: 'Search for "OpenAI"', | ||
// }); | ||
const observation1 = await stagehand.page.observe({ | ||
instruction: "Find the search bar and enter 'OpenAI'", | ||
onlyVisible: false, | ||
returnAction: true, | ||
}); | ||
console.log(observation1); | ||
|
||
if (observation1.length > 0) { | ||
const action1 = observation1[0]; | ||
await performPlaywrightMethod( | ||
stagehand.page, | ||
stagehand.logger, | ||
action1.method, | ||
action1.arguments, | ||
action1.selector.replace("xpath=", ""), | ||
); | ||
} | ||
await stagehand.page.waitForTimeout(5000); | ||
const observation2 = await stagehand.page.observe({ | ||
instruction: "Click the search button in the suggestions dropdown", | ||
onlyVisible: false, | ||
returnAction: true, | ||
}); | ||
console.log(observation2); | ||
|
||
if (observation2.length > 0) { | ||
const action2 = observation2[0]; | ||
await performPlaywrightMethod( | ||
stagehand.page, | ||
stagehand.logger, | ||
action2.method, | ||
action2.arguments, | ||
action2.selector.replace("xpath=", ""), | ||
); | ||
} | ||
await stagehand.page.waitForTimeout(5000); | ||
|
||
const expectedUrl = "https://www.google.com/search?q=OpenAI"; | ||
const currentUrl = stagehand.page.url(); | ||
|
||
await stagehand.close(); | ||
|
||
return { | ||
_success: currentUrl.startsWith(expectedUrl), | ||
currentUrl, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import { EvalFunction } from "@/types/evals"; | ||
import { initStagehand } from "@/evals/initStagehand"; | ||
|
||
export const observe_taxes: EvalFunction = async ({ modelName, logger }) => { | ||
const { stagehand, initResponse } = await initStagehand({ | ||
modelName, | ||
logger, | ||
}); | ||
|
||
const { debugUrl, sessionUrl } = initResponse; | ||
|
||
await stagehand.page.goto("https://file.1040.com/estimate/"); | ||
|
||
const observations = await stagehand.page.observe({ | ||
instruction: "Find all the form elements under the 'Income' section", | ||
}); | ||
|
||
if (observations.length === 0) { | ||
await stagehand.close(); | ||
return { | ||
_success: false, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
} else if (observations.length < 13) { | ||
await stagehand.close(); | ||
return { | ||
_success: false, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
} | ||
|
||
const expectedLocator = `#tpWages`; | ||
|
||
const expectedResult = await stagehand.page | ||
.locator(expectedLocator) | ||
.first() | ||
.innerText(); | ||
|
||
let foundMatch = false; | ||
for (const observation of observations) { | ||
try { | ||
const observationResult = await stagehand.page | ||
.locator(observation.selector) | ||
.first() | ||
.innerText(); | ||
|
||
if (observationResult === expectedResult) { | ||
foundMatch = true; | ||
break; | ||
} | ||
} catch (error) { | ||
console.warn( | ||
`Failed to check observation with selector ${observation.selector}:`, | ||
error.message, | ||
); | ||
continue; | ||
} | ||
} | ||
|
||
await stagehand.close(); | ||
|
||
return { | ||
_success: foundMatch, | ||
expected: expectedResult, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
}; |
Oops, something went wrong.