Skip to content

Commit

Permalink
Align the messages sent for browser agent to JSON RPC conventions (#575)
Browse files Browse the repository at this point in the history
- Add WebSocketMessageV2 type to hold new message schema
- Update browser extension service worker
- Update preload script used for inline browser
  • Loading branch information
hillary-mutisya authored Jan 18, 2025
1 parent 8311313 commit 52733e6
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 208 deletions.
71 changes: 39 additions & 32 deletions ts/packages/agents/browser/src/agent/actionHandler.mts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { WebSocketMessage, createWebSocket } from "common-utils/ws";
import { createWebSocket } from "common-utils/ws";
import { WebSocket } from "ws";
import {
ActionContext,
Expand Down Expand Up @@ -94,25 +94,37 @@ async function updateBrowserContext(
};
webSocket.addEventListener("message", async (event: any) => {
const text = event.data.toString();
const data = JSON.parse(text) as WebSocketMessage;
const data = JSON.parse(text);
if (isWebAgentMessage(data)) {
await processWebAgentMessage(data, context);
return;
}

if (data.target !== "dispatcher" || data.source !== "browser") {
if (data.error) {
console.error(data.error);
// TODO: Handle the case where no clients were found. Prompt the user
// to launch inline browser or run automation in the headless browser.
return;
}

if (data.body) {
switch (data.messageType) {
if (data.method) {
switch (data.method) {
case "enableSiteTranslator": {
if (data.body == "browser.crossword") {
const targetTranslator = data.params.translator;
if (targetTranslator == "browser.crossword") {
// initialize crossword state
sendSiteTranslatorStatus(data.body, "initializing", context);
sendSiteTranslatorStatus(
targetTranslator,
"initializing",
context,
);
context.agentContext.crossWordState =
await getBoardSchema(context);
sendSiteTranslatorStatus(data.body, "initialized", context);
sendSiteTranslatorStatus(
targetTranslator,
"initialized",
context,
);

if (context.agentContext.crossWordState) {
context.notify(
Expand All @@ -126,26 +138,26 @@ async function updateBrowserContext(
);
}
}
await context.toggleTransientAgent(data.body, true);
await context.toggleTransientAgent(targetTranslator, true);
break;
}
case "disableSiteTranslator": {
await context.toggleTransientAgent(data.body, false);
const targetTranslator = data.params.translator;
await context.toggleTransientAgent(targetTranslator, false);
break;
}
case "browserActionResponse": {
break;
}
case "debugBrowserAction": {
await executeBrowserAction(
data.body,
context as unknown as ActionContext<BrowserActionContext>,
case "addTabIdToIndex":
case "deleteTabIdFromIndex":
case "getTabIdFromIndex":
case "resetTabIdToIndex": {
await handleTabIndexActions(
{
actionName: data.method,
parameters: data.params,
},
context,
data.id,
);

break;
}
case "tabIndexRequest": {
await handleTabIndexActions(data.body, context, data.id);
break;
}
}
Expand Down Expand Up @@ -178,9 +190,9 @@ async function executeBrowserAction(
try {
context.actionIO.setDisplay("Running remote action.");

let messageType = "browserActionRequest";
let schemaName = "browser";
if (action.translatorName === "browser.paleoBioDb") {
messageType = "browserActionRequest.paleoBioDb";
schemaName = "browser.paleoBioDb";
} else if (action.translatorName === "browser.crossword") {
const crosswordResult = await handleCrosswordAction(action, context);
return createActionResult(crosswordResult);
Expand All @@ -199,7 +211,7 @@ async function executeBrowserAction(
// return createActionResult(instacartResult);
}

await connector?.sendActionToBrowser(action, messageType);
await connector?.sendActionToBrowser(action, schemaName);
} catch (ex: any) {
if (ex instanceof Error) {
console.error(ex);
Expand All @@ -226,9 +238,7 @@ function sendSiteTranslatorStatus(
if (webSocketEndpoint) {
webSocketEndpoint.send(
JSON.stringify({
source: "dispatcher",
target: "browser",
messageType: "siteTranslatorStatus",
method: "browser/siteTranslatorStatus",
id: callId,
body: {
translator: translatorName,
Expand Down Expand Up @@ -288,11 +298,8 @@ async function handleTabIndexActions(

webSocketEndpoint.send(
JSON.stringify({
source: "dispatcher",
target: "browser",
messageType: "tabIndexResponse",
id: requestId,
body: responseBody,
result: responseBody,
}),
);
} catch (ex: any) {
Expand Down
39 changes: 12 additions & 27 deletions ts/packages/agents/browser/src/agent/browserConnector.mts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { WebSocketMessage } from "common-utils";
import { AppAction, SessionContext } from "@typeagent/agent-sdk";
import { BrowserActionContext } from "./actionHandler.mjs";

Expand All @@ -12,44 +11,36 @@ export class BrowserConnector {
this.webSocket = context.agentContext.webSocket;
}

async sendActionToBrowser(action: AppAction, messageType?: string) {
async sendActionToBrowser(action: AppAction, schemaName?: string) {
return new Promise<any | undefined>((resolve, reject) => {
if (this.webSocket) {
try {
const callId = new Date().getTime().toString();
if (!messageType) {
messageType = "browserActionRequest";
if (!schemaName) {
schemaName = "browser";
}

this.webSocket.send(
JSON.stringify({
source: "dispatcher",
target: "browser",
messageType: messageType,
id: callId,
body: action,
method: `${schemaName}/${action.actionName}`,
params: action.parameters,
}),
);

const handler = (event: any) => {
const text = event.data.toString();
const data = JSON.parse(text) as WebSocketMessage;
if (
data.target == "dispatcher" &&
data.source == "browser" &&
data.messageType == "browserActionResponse" &&
data.id == callId &&
data.body
) {
const data = JSON.parse(text);
if (data.id == callId && data.result) {
this.webSocket.removeEventListener("message", handler);
resolve(data.body);
resolve(data.result);
}
};

this.webSocket.addEventListener("message", handler);
} catch {
console.log("Unable to contact browser backend.");
reject("Unable to contact browser backend.");
reject("Unable to contact browser backend (from connector).");
}
} else {
throw new Error("No websocket connection.");
Expand All @@ -59,10 +50,7 @@ export class BrowserConnector {

private async getPageDataFromBrowser(action: any) {
return new Promise<string | undefined>(async (resolve, reject) => {
const response = await this.sendActionToBrowser(
action,
"browserActionRequest",
);
const response = await this.sendActionToBrowser(action, "browser");
if (response.data) {
resolve(response.data);
} else {
Expand Down Expand Up @@ -174,7 +162,7 @@ export class BrowserConnector {
},
};

return this.sendActionToBrowser(schemaAction, "browserActionRequest");
return this.sendActionToBrowser(schemaAction, "browser");
}

async getPageUrl() {
Expand Down Expand Up @@ -214,10 +202,7 @@ export class BrowserConnector {
actionName: "awaitPageLoad",
};

const actionPromise = this.sendActionToBrowser(
action,
"browserActionRequest",
);
const actionPromise = this.sendActionToBrowser(action, "browser");
if (timeout) {
const timeoutPromise = new Promise((f) => setTimeout(f, timeout));
return Promise.race([actionPromise, timeoutPromise]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ export async function handleInstacartAction(
.findPageComponent("ShoppingCartDetails")
.thenRun(async (context) => {
const cartDetails = context["ShoppingCartDetails"];
console.log(cartDetails);
// console.log(cartDetails);

entities.push({
name: cartDetails.storeName,
Expand Down
Loading

0 comments on commit 52733e6

Please sign in to comment.