-
Notifications
You must be signed in to change notification settings - Fork 188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Linux support #45
base: main
Are you sure you want to change the base?
Conversation
Related to bytedance#9 Add Linux support, including Wayland and X11, to the project. * **Environment Detection**: - Add `isWayland` constant to check if the display server is Wayland in `src/main/env.ts`. - Update `isLinux` constant to differentiate between Wayland and X11 in `src/main/env.ts`. * **Device Handling**: - Import `isWayland` in `src/main/agent/device.ts`. - Add condition to handle Wayland-specific functionalities using `@nut-tree/nut.js` in `src/main/agent/device.ts`. - Update `screenshot` method to handle Wayland-specific functionalities in `src/main/agent/device.ts`. * **Input Controls**: - Import `isWayland` in `src/main/agent/execute.ts`. - Add condition to handle Wayland-specific input controls in `src/main/agent/execute.ts`. * **Documentation**: - Add installation and usage instructions for Linux, including Wayland and X11, in `README.md`.
Could you please add an Electron Linux version(using @electron-forge/maker-deb)? |
* **Wayland Detection**: Modify `isWayland` constant in `src/main/env.ts` to correctly detect Wayland environments using `XDG_SESSION_TYPE`. * **Forge Configuration**: Update `forge.config.ts` to include `@electron-forge/maker-deb` maker and add configuration for it. * **Electron Builder Configuration**: Specify `deb` target under the `linux` section in `electron-builder.yml` and add configuration for `deb` target.
}); | ||
const primarySource = sources[0]; | ||
const screenshot = primarySource.thumbnail; | ||
if (isWayland) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any differences in the screenshot methods regarding the isWayland
?
```python | ||
import pyautogui | ||
print(pyautogui.size()) # Should output your screen resolution | ||
``` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The operational control framework utilized is https://nutjs.dev/, and there is no need to install pyautogui here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The operational control framework utilized is https://nutjs.dev/, and there is no need to install pyautogui here.
nut.js
doesn't support Wayland (See here), so maybe he wants to use pyautogui
to handle operation in Wayland? But he hasn't implemented the logic yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pyautogui
is not implemented at the execution layer because nutjs uses pyautogui
?
UI-TARS-desktop/src/main/agent/execute.ts
Lines 63 to 256 in 3a5096f
export const execute = async (executeParams: ExecuteParams) => { | |
const { | |
prediction, | |
screenWidth, | |
screenHeight, | |
logger = console, | |
scaleFactor = 1, | |
} = executeParams; | |
logger.info( | |
'[execute] executeParams', | |
JSON.stringify({ | |
scaleFactor, | |
prediction, | |
screenWidth, | |
screenHeight, | |
}), | |
); | |
const { action_type, action_inputs } = prediction; | |
const startBoxStr = action_inputs?.start_box || ''; | |
logger.info('[execute] action_type', action_type, 'startBoxStr', startBoxStr); | |
const { x: startX, y: startY } = parseBoxToScreenCoordsWithScaleFactor({ | |
boxStr: startBoxStr, | |
screenWidth, | |
screenHeight, | |
scaleFactor, | |
}); | |
logger.info(`[execute] [Position] (${startX}, ${startY})`); | |
// execute configs | |
mouse.config.mouseSpeed = 1500; | |
// if (startBoxStr) { | |
// const region = await nutScreen.highlight( | |
// new Region(startX, startY, 100, 100), | |
// ); | |
// logger.info('[execute] [Region]', region); | |
// } | |
switch (action_type) { | |
case 'wait': | |
logger.info('[device] wait', action_inputs); | |
await sleep(1000); | |
break; | |
case 'mouse_move': | |
case 'hover': | |
logger.info('[device] mouse_move'); | |
await moveStraightTo(startX, startY); | |
break; | |
case 'click': | |
case 'left_click': | |
case 'left_single': | |
logger.info('[device] left_click'); | |
await moveStraightTo(startX, startY); | |
await sleep(100); | |
await mouse.click(Button.LEFT); | |
break; | |
case 'left_double': | |
case 'double_click': | |
logger.info(`[device] ${action_type}(${startX}, ${startY})`); | |
await moveStraightTo(startX, startY); | |
await sleep(100); | |
await mouse.doubleClick(Button.LEFT); | |
break; | |
case 'right_click': | |
case 'right_single': | |
logger.info('[device] right_click'); | |
await moveStraightTo(startX, startY); | |
await sleep(100); | |
await mouse.click(Button.RIGHT); | |
break; | |
case 'middle_click': | |
logger.info('[device] middle_click'); | |
await moveStraightTo(startX, startY); | |
await mouse.click(Button.MIDDLE); | |
break; | |
case 'left_click_drag': | |
case 'drag': | |
case 'select': { | |
logger.info('[device] drag', action_inputs); | |
// end_box | |
if (action_inputs?.end_box) { | |
const { x: endX, y: endY } = parseBoxToScreenCoordsWithScaleFactor({ | |
boxStr: action_inputs.end_box, | |
screenWidth, | |
screenHeight, | |
scaleFactor, | |
}); | |
if (startX && startY && endX && endY) { | |
// calculate x and y direction difference | |
const diffX = Big(endX).minus(startX).toNumber(); | |
const diffY = Big(endY).minus(startY).toNumber(); | |
await mouse.drag( | |
straightTo(centerOf(new Region(startX, startY, diffX, diffY))), | |
); | |
} | |
} | |
break; | |
} | |
case 'type': { | |
const content = action_inputs.content?.trim(); | |
logger.info('[device] type', content); | |
if (content) { | |
const stripContent = content.replace(/\\n$/, '').replace(/\n$/, ''); | |
keyboard.config.autoDelayMs = 0; | |
if (env.isWindows) { | |
const originalClipboard = clipboard.readText(); | |
clipboard.writeText(stripContent); | |
await keyboard.pressKey(Key.LeftControl, Key.V); | |
await keyboard.releaseKey(Key.LeftControl, Key.V); | |
await sleep(500); | |
clipboard.writeText(originalClipboard); | |
} else { | |
await keyboard.type(stripContent); | |
} | |
if (content.endsWith('\n') || content.endsWith('\\n')) { | |
await keyboard.pressKey(Key.Enter); | |
await keyboard.releaseKey(Key.Enter); | |
} | |
keyboard.config.autoDelayMs = 500; | |
} | |
break; | |
} | |
case 'hotkey': { | |
const keyStr = action_inputs?.key || action_inputs?.hotkey; | |
if (keyStr) { | |
const keyMap: Record<string, Key> = { | |
return: Key.Enter, | |
enter: Key.Enter, | |
ctrl: Key.LeftControl, | |
shift: Key.LeftShift, | |
alt: Key.LeftAlt, | |
space: Key.Space, | |
'page down': Key.PageDown, | |
pagedown: Key.PageDown, | |
'page up': Key.PageUp, | |
pageup: Key.PageUp, | |
}; | |
const keys = keyStr | |
.split(/[\s+]/) | |
.map((k) => keyMap[k.toLowerCase()] || Key[k as keyof typeof Key]); | |
logger.info('[hotkey]: ', keys); | |
await keyboard.pressKey(...keys); | |
await keyboard.releaseKey(...keys); | |
} | |
break; | |
} | |
case 'scroll': { | |
const { direction } = action_inputs; | |
// if startX and startY is not null, move mouse to startX, startY | |
if (startX !== null && startY !== null) { | |
await moveStraightTo(startX, startY); | |
} | |
switch (direction?.toLowerCase()) { | |
case 'up': | |
await mouse.scrollUp(5 * 100); | |
break; | |
case 'down': | |
await mouse.scrollDown(5 * 100); | |
break; | |
default: | |
console.warn(`Unsupported scroll direction: ${direction}`); | |
} | |
break; | |
} | |
case 'screenshot': | |
case 'finished': | |
break; | |
default: | |
logger.warn(`Unsupported action: ${action_type}`); | |
} | |
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because nutjs uses pyautogui?
I don't think so ( ) At least nut.js
's code doesn't indicate that thing. I think maybe he hasn't finished his PR yet? (
CI is broken, can you fix it? |
Related to #9
Add Linux support, including Wayland and X11, to the project.
Environment Detection:
isWayland
constant to check if the display server is Wayland insrc/main/env.ts
.isLinux
constant to differentiate between Wayland and X11 insrc/main/env.ts
.Device Handling:
isWayland
insrc/main/agent/device.ts
.@nut-tree/nut.js
insrc/main/agent/device.ts
.screenshot
method to handle Wayland-specific functionalities insrc/main/agent/device.ts
.Input Controls:
isWayland
insrc/main/agent/execute.ts
.src/main/agent/execute.ts
.Documentation:
README.md
.