构建测试套件
Golden Dataset、回归测试
4.1 Golden Dataset (黄金数据集)
javascript
// test-cases.json
const goldenDataset = [
{
id: "login-form-1",
category: "component",
input: "生成一个 React 登录表单,使用 React Hook Form 和 Zod 验证",
expectedBehaviors: [
"使用 useForm hook",
"使用 zodResolver",
"包含 email 和 password 字段",
"有提交按钮",
"显示验证错误"
],
forbiddenPatterns: [
"any",
"document.getElementById"
],
minQualityScore: 4
},
{
id: "fetch-hook-1",
category: "hook",
input: "创建一个 useFetch hook,支持加载状态和错误处理",
expectedBehaviors: [
"返回 data, loading, error",
"使用 useEffect",
"支持取消请求",
"处理竞态条件"
],
forbiddenPatterns: [],
minQualityScore: 4
}
];
class TestRunner {
async runAll(prompt, model) {
const results = [];
for (const testCase of goldenDataset) {
const result = await this.runTestCase(testCase, prompt, model);
results.push(result);
}
return {
total: results.length,
passed: results.filter(r => r.passed).length,
failed: results.filter(r => !r.passed).length,
passRate: results.filter(r => r.passed).length / results.length,
details: results
};
}
async runTestCase(testCase, systemPrompt, model) {
// 生成输出
const output = await llm.chat({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: testCase.input }
]
});
// 检查预期行为
const behaviorChecks = testCase.expectedBehaviors.map(behavior => ({
behavior,
present: output.content.toLowerCase().includes(behavior.toLowerCase()) ||
await this.checkBehaviorWithLLM(output.content, behavior)
}));
// 检查禁止模式
const forbiddenChecks = testCase.forbiddenPatterns.map(pattern => ({
pattern,
found: output.content.includes(pattern)
}));
// 质量评分
const qualityScore = await this.getQualityScore(testCase.input, output.content);
const passed =
behaviorChecks.every(c => c.present) &&
forbiddenChecks.every(c => !c.found) &&
qualityScore >= testCase.minQualityScore;
return {
id: testCase.id,
passed,
behaviorChecks,
forbiddenChecks,
qualityScore,
output: output.content.slice(0, 500) + '...'
};
}
}4.2 回归测试
javascript
class RegressionTester {
constructor(baselineResults) {
this.baseline = baselineResults;
}
async compare(newResults) {
const regressions = [];
const improvements = [];
for (const newResult of newResults.details) {
const baselineResult = this.baseline.details.find(
r => r.id === newResult.id
);
if (!baselineResult) continue;
// 检测回归
if (baselineResult.passed && !newResult.passed) {
regressions.push({
testId: newResult.id,
type: 'regression',
oldScore: baselineResult.qualityScore,
newScore: newResult.qualityScore
});
}
// 检测改进
if (!baselineResult.passed && newResult.passed) {
improvements.push({
testId: newResult.id,
type: 'improvement',
oldScore: baselineResult.qualityScore,
newScore: newResult.qualityScore
});
}
}
return {
hasRegressions: regressions.length > 0,
regressions,
improvements,
summary: {
oldPassRate: this.baseline.passRate,
newPassRate: newResults.passRate,
delta: newResults.passRate - this.baseline.passRate
}
};
}
}