成本优化与安全护栏
Token 使用分析、优化策略、输入输出过滤、行为边界
6. 成本优化
6.1 Token 使用分析
javascript
class CostAnalyzer {
constructor() {
this.records = [];
}
record(request, response) {
this.records.push({
timestamp: Date.now(),
model: request.model,
inputTokens: response.usage.prompt_tokens,
outputTokens: response.usage.completion_tokens,
cost: this.calculateCost(request.model, response.usage)
});
}
calculateCost(model, usage) {
const pricing = {
'claude-sonnet-4-20250514': { input: 0.003, output: 0.015 },
'claude-3-5-haiku-20241022': { input: 0.0008, output: 0.004 },
'gpt-4o': { input: 0.005, output: 0.015 },
'gpt-4o-mini': { input: 0.00015, output: 0.0006 }
};
const price = pricing[model] || { input: 0, output: 0 };
return (
(usage.prompt_tokens / 1000) * price.input +
(usage.completion_tokens / 1000) * price.output
);
}
getReport(period = '24h') {
const since = Date.now() - this.parsePeriod(period);
const filtered = this.records.filter(r => r.timestamp >= since);
return {
totalCost: sum(filtered.map(r => r.cost)),
totalInputTokens: sum(filtered.map(r => r.inputTokens)),
totalOutputTokens: sum(filtered.map(r => r.outputTokens)),
requestCount: filtered.length,
avgCostPerRequest: sum(filtered.map(r => r.cost)) / filtered.length,
byModel: this.groupByModel(filtered)
};
}
}6.2 优化策略
javascript
class CostOptimizer {
// 策略 1: 模型路由
async routeToModel(task, complexity) {
const modelTiers = {
simple: 'gpt-4o-mini', // 简单任务
medium: 'claude-3-5-haiku-20241022', // 中等任务
complex: 'claude-sonnet-4-20250514' // 复杂任务
};
// 自动判断复杂度
if (complexity === 'auto') {
complexity = await this.assessComplexity(task);
}
return modelTiers[complexity] || modelTiers.medium;
}
// 策略 2: Prompt 压缩
async compressPrompt(prompt, maxTokens) {
const currentTokens = estimateTokens(prompt);
if (currentTokens <= maxTokens) {
return prompt;
}
// 使用小模型压缩
const compressed = await llm.chat({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `压缩以下内容,保留关键信息,目标 ${maxTokens} tokens:\n\n${prompt}`
}]
});
return compressed.content;
}
// 策略 3: 缓存
constructor() {
this.cache = new Map();
}
async cachedCall(key, fn) {
if (this.cache.has(key)) {
return this.cache.get(key);
}
const result = await fn();
this.cache.set(key, result);
return result;
}
}7. 安全护栏
7.1 输入过滤
javascript
class InputGuard {
constructor() {
this.blockedPatterns = [
/ignore.*instructions/i,
/forget.*rules/i,
/pretend.*you.*are/i,
/system.*prompt/i
];
}
check(input) {
// 检查注入尝试
for (const pattern of this.blockedPatterns) {
if (pattern.test(input)) {
return {
safe: false,
reason: 'Potential prompt injection detected'
};
}
}
// 检查长度
if (input.length > 50000) {
return {
safe: false,
reason: 'Input too long'
};
}
return { safe: true };
}
sanitize(input) {
// 移除潜在危险内容
return input
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK]') // 移除代码块
.replace(/<script[\s\S]*?<\/script>/gi, '') // 移除脚本
.trim();
}
}7.2 输出过滤
javascript
class OutputGuard {
constructor() {
this.sensitivePatterns = [
/api[_-]?key\s*[:=]\s*['"][^'"]+['"]/gi,
/password\s*[:=]\s*['"][^'"]+['"]/gi,
/secret\s*[:=]\s*['"][^'"]+['"]/gi,
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g // 邮箱
];
}
check(output) {
const issues = [];
for (const pattern of this.sensitivePatterns) {
const matches = output.match(pattern);
if (matches) {
issues.push({
type: 'sensitive_data',
matches: matches.map(m => m.slice(0, 20) + '...')
});
}
}
return {
safe: issues.length === 0,
issues
};
}
redact(output) {
let redacted = output;
for (const pattern of this.sensitivePatterns) {
redacted = redacted.replace(pattern, '[REDACTED]');
}
return redacted;
}
}7.3 行为边界
javascript
class BehaviorGuard {
constructor(config) {
this.maxTokensPerRequest = config.maxTokensPerRequest || 4096;
this.maxRequestsPerMinute = config.maxRequestsPerMinute || 60;
this.requestCounts = new Map();
}
async checkRateLimit(userId) {
const now = Date.now();
const minute = Math.floor(now / 60000);
const key = `${userId}:${minute}`;
const count = (this.requestCounts.get(key) || 0) + 1;
this.requestCounts.set(key, count);
// 清理旧记录
for (const [k] of this.requestCounts) {
if (!k.endsWith(`:${minute}`)) {
this.requestCounts.delete(k);
}
}
if (count > this.maxRequestsPerMinute) {
throw new Error('Rate limit exceeded');
}
return true;
}
validateRequest(request) {
const errors = [];
if (estimateTokens(request.messages) > this.maxTokensPerRequest) {
errors.push('Request exceeds max tokens');
}
if (request.temperature > 1.5) {
errors.push('Temperature too high');
}
return {
valid: errors.length === 0,
errors
};
}
}