RAG 与记忆系统:让 AI 拥有知识和记忆 📚
"LLM 的知识是静态的,RAG 让它动态获取信息。"
1. RAG 基础
RAG (Retrieval-Augmented Generation) 让 LLM 访问外部知识。
传统 LLM: 问题 → LLM → 回答
RAG: 问题 → 检索文档 → LLM (问题 + 文档) → 回答工作流: 文档 → 分块 → 嵌入 → 向量数据库 → 查询检索 → LLM 生成
2. 文档分块策略
2.1 基础分块
javascript
// 按字符数分块(带重叠)
function chunkBySize(text, chunkSize = 1000, overlap = 200) {
const chunks = [];
let start = 0;
while (start < text.length) {
const end = Math.min(start + chunkSize, text.length);
chunks.push(text.slice(start, end));
start = end - overlap;
}
return chunks;
}
// 按段落分块
function chunkByParagraph(text, maxSize = 1000) {
const paragraphs = text.split(/\n\n+/);
const chunks = [];
let current = '';
for (const para of paragraphs) {
if ((current + para).length > maxSize && current) {
chunks.push(current.trim());
current = para;
} else {
current += '\n\n' + para;
}
}
if (current.trim()) chunks.push(current.trim());
return chunks;
}2.2 语义分块
javascript
async function semanticChunking(text) {
const response = await llm.chat({
messages: [{
role: 'user',
content: `将文本分割成语义完整的段落,用 "---CHUNK---" 分隔:\n${text}`
}]
});
return response.content.split('---CHUNK---').map(c => c.trim());
}2.3 代码分块
javascript
function chunkCode(code, language) {
const ast = parseAST(code, language);
return ast.body
.filter(node => ['FunctionDeclaration', 'ClassDeclaration'].includes(node.type))
.map(node => ({
type: node.type,
name: node.id?.name || 'anonymous',
content: code.slice(node.start, node.end)
}));
}3. 向量嵌入
javascript
import OpenAI from 'openai';
const openai = new OpenAI();
async function getEmbedding(text) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: text
});
return response.data[0].embedding;
}
// 批量嵌入
async function getEmbeddings(texts) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: texts
});
return response.data.map(d => d.embedding);
}模型选择:
text-embedding-3-small(1536维): 快速、便宜text-embedding-3-large(3072维): 更精确Cohere embed-v3(1024维): 多语言优秀BGE-M3(1024维): 开源、本地部署
4. 向量数据库
4.1 Chroma 示例
javascript
import { ChromaClient } from 'chromadb';
const client = new ChromaClient();
const collection = await client.createCollection({
name: 'codebase',
metadata: { 'hnsw:space': 'cosine' }
});
// 添加文档
await collection.add({
ids: ['doc1', 'doc2'],
embeddings: [embedding1, embedding2],
documents: ['内容1', '内容2'],
metadatas: [
{ source: 'src/App.tsx', type: 'code' },
{ source: 'README.md', type: 'doc' }
]
});
// 查询
const results = await collection.query({
queryEmbeddings: [queryEmbedding],
nResults: 5,
where: { type: 'code' }
});4.2 PostgreSQL + pgvector
sql
CREATE EXTENSION vector;
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding vector(1536),
metadata JSONB
);
CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops);
-- 查询最相似文档
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
ORDER BY embedding <=> $1
LIMIT 5;5. 检索策略
5.1 混合检索
javascript
async function hybridSearch(query, k = 5) {
const vectorResults = await vectorSearch(query, k * 2);
const keywordResults = await bm25Search(query, k * 2);
// RRF 融合排序
const scores = new Map();
vectorResults.forEach((doc, rank) => {
scores.set(doc.id, (scores.get(doc.id) || 0) + 1 / (60 + rank));
});
keywordResults.forEach((doc, rank) => {
scores.set(doc.id, (scores.get(doc.id) || 0) + 1 / (60 + rank));
});
return [...scores.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, k)
.map(([id]) => getDocument(id));
}5.2 重排序
javascript
async function retrieveWithRerank(query, k = 5) {
const candidates = await retrieve(query, k * 3);
const response = await cohere.rerank({
model: 'rerank-english-v3.0',
query,
documents: candidates.map(d => d.content)
});
return response.results
.sort((a, b) => b.relevance_score - a.relevance_score)
.slice(0, k)
.map(r => candidates[r.index]);
}5.3 查询扩展
javascript
async function expandQuery(query) {
const response = await llm.chat({
messages: [{
role: 'user',
content: `生成 3 个与以下查询语义相似的问题:\n"${query}"\n每行一个。`
}]
});
return [query, ...response.content.split('\n').filter(Boolean)];
}
async function retrieveWithExpansion(query, k = 5) {
const queries = await expandQuery(query);
const allResults = await Promise.all(queries.map(q => retrieve(q, k)));
const seen = new Set();
const merged = [];
for (const results of allResults) {
for (const doc of results) {
if (!seen.has(doc.id)) {
seen.add(doc.id);
merged.push(doc);
}
}
}
return merged.slice(0, k);
}6. RAG 应用
6.1 完整 RAG 系统
javascript
class RAGSystem {
constructor(collection, llm) {
this.collection = collection;
this.llm = llm;
}
async query(question) {
const docs = await this.retrieve(question);
const context = docs.map(d => d.content).join('\n\n---\n\n');
const response = await this.llm.chat({
messages: [
{
role: 'system',
content: `根据上下文回答问题。如果上下文中没有相关信息,说"我没有找到相关信息"。\n\n上下文:\n${context}`
},
{ role: 'user', content: question }
]
});
return {
answer: response.content,
sources: docs.map(d => d.metadata.source)
};
}
async retrieve(question, k = 5) {
const embedding = await getEmbedding(question);
const results = await this.collection.query({
queryEmbeddings: [embedding],
nResults: k
});
return results.documents[0].map((content, i) => ({
content,
metadata: results.metadatas[0][i]
}));
}
}6.2 代码库 RAG
javascript
class CodebaseRAG {
async indexRepository(repoPath) {
const files = await glob(`${repoPath}/**/*.{ts,tsx,js,jsx}`);
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
const chunks = chunkCode(content, getLanguage(file));
for (const chunk of chunks) {
await this.collection.add({
ids: [`${file}:${chunk.name}`],
documents: [chunk.content],
metadatas: [{ file, type: chunk.type, name: chunk.name }]
});
}
}
}
async findRelatedCode(question) {
const results = await this.retrieve(question, 10);
const filtered = await this.llm.chat({
messages: [{
role: 'user',
content: `用户问题: ${question}\n\n选择最相关的 3-5 个代码片段:\n${results.map((r, i) => `[${i}] ${r.metadata.file}\n${r.content}`).join('\n\n')}\n\n输出编号,逗号分隔。`
}]
});
const indices = filtered.content.split(',').map(Number);
return indices.map(i => results[i]);
}
}7. 记忆系统
7.1 对话记忆
javascript
class ConversationMemory {
constructor(maxMessages = 20) {
this.messages = [];
this.maxMessages = maxMessages;
}
add(role, content) {
this.messages.push({ role, content, timestamp: Date.now() });
if (this.messages.length > this.maxMessages) {
const systemMessages = this.messages.filter(m => m.role === 'system');
const recentMessages = this.messages
.filter(m => m.role !== 'system')
.slice(-this.maxMessages + systemMessages.length);
this.messages = [...systemMessages, ...recentMessages];
}
}
getContext() {
return this.messages.map(({ role, content }) => ({ role, content }));
}
async summarize() {
if (this.messages.length < 10) return;
const oldMessages = this.messages.slice(0, -5);
const summary = await llm.chat({
messages: [{
role: 'user',
content: `总结对话关键信息:\n${oldMessages.map(m => `${m.role}: ${m.content}`).join('\n')}`
}]
});
this.messages = [
{ role: 'system', content: `对话历史摘要: ${summary.content}` },
...this.messages.slice(-5)
];
}
}7.2 长期记忆
javascript
class LongTermMemory {
constructor(vectorDB) {
this.collection = vectorDB.collection('memories');
}
async store(content, metadata = {}) {
const id = crypto.randomUUID();
const embedding = await getEmbedding(content);
await this.collection.add({
ids: [id],
embeddings: [embedding],
documents: [content],
metadatas: [{ ...metadata, timestamp: Date.now() }]
});
return id;
}
async recall(query, k = 5) {
const embedding = await getEmbedding(query);
const results = await this.collection.query({
queryEmbeddings: [embedding],
nResults: k
});
return results.documents[0].map((content, i) => ({
content,
metadata: results.metadatas[0][i]
}));
}
async processConversation(messages) {
const response = await llm.chat({
messages: [{
role: 'user',
content: `提取值得长期记忆的信息:\n${messages.map(m => `${m.role}: ${m.content}`).join('\n')}\n\n输出 JSON 数组,每项包含 content 和 type。无则输出 []。`
}],
response_format: { type: 'json_object' }
});
const memories = JSON.parse(response.content).memories || [];
for (const memory of memories) {
await this.store(memory.content, { type: memory.type });
}
}
}7.3 工作记忆
javascript
class WorkingMemory {
constructor() {
this.scratchpad = {};
this.facts = [];
this.goals = [];
}
set(key, value) {
this.scratchpad[key] = { value, timestamp: Date.now() };
}
get(key) {
return this.scratchpad[key]?.value;
}
addFact(fact) {
this.facts.push({ content: fact, timestamp: Date.now() });
}
toSystemPrompt() {
const scratchpad = Object.entries(this.scratchpad).map(([k, v]) => `${k}: ${v.value}`);
return `
## 当前工作记忆
${scratchpad.length > 0 ? `临时数据:\n${scratchpad.join('\n')}` : ''}
${this.facts.length > 0 ? `已确认事实:\n${this.facts.map(f => f.content).join('\n')}` : ''}
${this.goals.length > 0 ? `当前目标:\n${this.goals.join('\n')}` : ''}
`.trim();
}
}8. 上下文管理
8.1 优先级管理
javascript
class ContextManager {
constructor(maxTokens = 100000) {
this.maxTokens = maxTokens;
this.priorities = {
system_prompt: 1,
user_question: 2,
relevant_code: 3,
recent_messages: 4,
retrieved_docs: 5,
history_summary: 6
};
}
buildContext(components) {
const sorted = Object.entries(components)
.sort(([a], [b]) => this.priorities[a] - this.priorities[b]);
let totalTokens = 0;
const included = [];
for (const [type, content] of sorted) {
const tokens = estimateTokens(content);
if (totalTokens + tokens <= this.maxTokens) {
included.push({ type, content });
totalTokens += tokens;
} else {
const remaining = this.maxTokens - totalTokens;
if (remaining > 500) {
included.push({
type,
content: truncateToTokens(content, remaining),
truncated: true
});
}
break;
}
}
return included;
}
}8.2 动态上下文选择
javascript
async function selectContext(question, availableContext) {
const response = await llm.chat({
messages: [{
role: 'user',
content: `用户问题: ${question}\n\n选择最相关的上下文(最多 5 个):\n${availableContext.map((ctx, i) => `[${i}] ${ctx.title}\n${ctx.preview}`).join('\n\n')}\n\n输出编号,逗号分隔。`
}]
});
const indices = response.content.split(',').map(s => parseInt(s.trim()));
return indices.map(i => availableContext[i]).filter(Boolean);
}9. 实战:代码库问答系统
javascript
class CodebaseQA {
constructor() {
this.vectorDB = new ChromaClient();
this.memory = new ConversationMemory();
this.longTermMemory = new LongTermMemory(this.vectorDB);
}
async initialize(repoPath) {
this.collection = await this.vectorDB.createCollection({ name: 'codebase' });
await this.indexRepository(repoPath);
}
async chat(question) {
const relevantCode = await this.retrieveCode(question);
const memories = await this.longTermMemory.recall(question, 3);
const context = this.buildContext(relevantCode, memories);
this.memory.add('user', question);
const history = this.memory.getContext();
const response = await llm.chat({
messages: [
{
role: 'system',
content: `你是代码库专家。\n\n## 相关代码\n${context.code}\n\n## 相关记忆\n${context.memories}\n\n规则:\n- 引用代码时说明文件路径\n- 不确定时明确说明\n- 提供具体代码示例`
},
...history,
{ role: 'user', content: question }
]
});
this.memory.add('assistant', response.content);
await this.longTermMemory.processConversation([
{ role: 'user', content: question },
{ role: 'assistant', content: response.content }
]);
return {
answer: response.content,
sources: relevantCode.map(c => c.metadata.file)
};
}
}10. 关键要点
- 分块策略: 语义完整的分块提高检索质量
- 混合检索: 结合向量和关键词检索
- 重排序: 用更精确的模型重排检索结果
- 分层记忆: 工作记忆 + 对话记忆 + 长期记忆
- 上下文管理: 在有限窗口内优化信息选择
- 持续优化: 监控检索质量,迭代改进