5. 浏览器自动化系统
5.1 browser_use 集成
Manus 的浏览器自动化基于开源库 browser_use,底层使用 Playwright:
typescript
// browser_use 核心接口
interface BrowserAgent {
// 导航
goto(url: string): Promise<void>;
// 交互
click(selector: string | Coordinates): Promise<void>;
type(text: string): Promise<void>;
scroll(direction: "up" | "down", amount?: number): Promise<void>;
// 信息获取
screenshot(): Promise<Buffer>;
getContent(): Promise<string>;
extractData(schema: DataSchema): Promise<any>;
// 高级操作
executeScript(script: string): Promise<any>;
waitForElement(selector: string, timeout?: number): Promise<void>;
}5.2 浏览器工具定义
typescript
const browserTools = [
{
name: "browser_navigate",
description: "Navigate to a URL. Use this to open websites.",
parameters: {
type: "object",
properties: {
url: {
type: "string",
description: "The URL to navigate to"
}
},
required: ["url"]
}
},
{
name: "browser_click",
description: `Click on an element on the page.
You can specify:
- A CSS selector: "#submit-btn", ".nav-link"
- Text content: "Submit", "Next"
- Coordinates: { x: 100, y: 200 }
Tips:
- Prefer semantic selectors over coordinates
- Wait for page to load before clicking
- Take screenshot after to verify action`,
parameters: {
type: "object",
properties: {
target: {
type: "string",
description: "CSS selector, text content, or coordinates JSON"
}
},
required: ["target"]
}
},
{
name: "browser_type",
description: `Type text into the currently focused element or specified input.
Supports special keys:
- [Enter] - Press enter key
- [Tab] - Press tab key
- [Escape] - Press escape
- [Ctrl+a] - Select all
- [Ctrl+c] - Copy
- [Ctrl+v] - Paste`,
parameters: {
type: "object",
properties: {
text: { type: "string" },
selector: {
type: "string",
description: "Optional: target input element"
}
},
required: ["text"]
}
},
{
name: "browser_screenshot",
description: `Capture a screenshot of the current page.
Always take screenshots:
- After navigation to see the page
- After clicking to verify the action
- When you need to analyze page content
- Before making decisions about next steps`,
parameters: {
type: "object",
properties: {
fullPage: {
type: "boolean",
description: "Capture full page vs viewport only"
}
}
}
},
{
name: "browser_extract",
description: "Extract structured data from the current page",
parameters: {
type: "object",
properties: {
schema: {
type: "object",
description: "JSON schema describing the data to extract"
},
selector: {
type: "string",
description: "Optional: CSS selector to scope extraction"
}
},
required: ["schema"]
}
}
];5.3 浏览器自动化实现
typescript
class BrowserController {
private browser: Browser;
private page: Page;
async init() {
const playwright = require('playwright');
this.browser = await playwright.chromium.launch({
headless: false, // Manus 使用有头浏览器以便截图
args: ['--disable-blink-features=AutomationControlled']
});
this.page = await this.browser.newPage();
// 设置视口
await this.page.setViewportSize({ width: 1920, height: 1080 });
// 注入反检测脚本
await this.page.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
});
}
async navigate(url: string): Promise<NavigationResult> {
try {
await this.page.goto(url, {
waitUntil: 'networkidle',
timeout: 30000
});
return {
success: true,
url: this.page.url(),
title: await this.page.title()
};
} catch (error) {
return {
success: false,
error: error.message
};
}
}
async click(target: string | { x: number; y: number }): Promise<void> {
if (typeof target === 'string') {
// 尝试多种定位策略
const strategies = [
() => this.page.click(target), // CSS selector
() => this.page.click(`text=${target}`), // Text content
() => this.page.getByRole('button', { name: target }).click(),
() => this.page.getByRole('link', { name: target }).click()
];
for (const strategy of strategies) {
try {
await strategy();
return;
} catch {
continue;
}
}
throw new Error(`Could not find clickable element: ${target}`);
} else {
// 坐标点击
await this.page.mouse.click(target.x, target.y);
}
}
async screenshot(): Promise<ScreenshotResult> {
const buffer = await this.page.screenshot({
type: 'png',
fullPage: false
});
return {
image: buffer.toString('base64'),
timestamp: new Date().toISOString(),
url: this.page.url(),
viewport: { width: 1920, height: 1080 }
};
}
async extractContent(schema: DataSchema): Promise<any> {
// 获取页面内容
const html = await this.page.content();
const text = await this.page.innerText('body');
// 使用 LLM 提取结构化数据
const response = await this.llm.chat({
messages: [{
role: "user",
content: `Extract data from this webpage according to the schema.
Page URL: ${this.page.url()}
Page Content:
${text.slice(0, 10000)}
Schema:
${JSON.stringify(schema, null, 2)}
Output valid JSON matching the schema.`
}]
});
return JSON.parse(response.content);
}
}