句子滑动窗口检索)
一、介绍1、背景在构建现代 RAGRetrieval-Augmented Generation系统时最核心的矛盾之一是检索需要“精确命中”但生成需要“足够上下文”。如果只做粗粒度切块chunk很容易出现命中不准如果切得太细又会导致上下文缺失。节点-句子滑动窗口检索正是为了解决这个问题而提出的一种工程化方案它在 LlamaIndex 等框架中被广泛使用。2、核心思想——从小到大保证检索足够精准同时不丢上下文语义。检索用“句子级别”生成用“窗口级别”。也就是说在索引阶段我们把文档拆分成最小语义单位——句子每个句子作为一个独立节点参与向量化或关键词检索但在返回结果时并不只返回这一句话而是返回它前后扩展的一段“窗口文本”。这个窗口通常包含当前句子前面 N 句后面 N 句句子滑动窗口检索想要的是召回精度接近句子级上下文完整性接近段落级3、它和普通“滑动窗口切片”有什么区别方式切分方式存储什么检索时怎么用普通滑动窗口切片按固定长度文本块 overlap直接存重叠 chunk直接拿 chunk 给 LLM句子滑动窗口检索按句子切存“中心句 前后窗口”先召回句子再用窗口替换传统 window-based passage retrieval按固定窗口在全文上滑动每个窗口都算一个 passage取得分最高的窗口二、实现方法LlamaIndex 官方把这套模式叫SentenceWindowNodeParser MetadataReplacementPostProcessor每个 node 是一个句子metadata 里保存这个句子的上下文窗口检索后再用窗口替换原句。from llama_index.core import VectorStoreIndex from llama_index.core.node_parser import SentenceWindowNodeParser from llama_index.core.postprocessor import MetadataReplacementPostProcessor # 1. 句子级切分 窗口写入 metadata node_parser SentenceWindowNodeParser.from_defaults( window_size2, # 每边 2 句 window_metadata_keywindow, original_text_metadata_keyoriginal_text, ) nodes node_parser.get_nodes_from_documents(documents) # 2. 建索引 index VectorStoreIndex(nodes) # 3. 检索后用 window 替换原句 query_engine index.as_query_engine( similarity_top_k3, node_postprocessors[ MetadataReplacementPostProcessor(target_metadata_keywindow) ], ) resp query_engine.query(电热水壶第一次使用前要做什么) print(resp)流程SentenceWindowNodeParser 的做法每个 sentence 单独变成 node如:node1text:Spring Boot 启动时会创建 ApplicationContext。metadata:{window: Spring Boot 启动时会创建 ApplicationContext。ApplicationReadyEvent 会在应用启动完成后触发。}node2text:ApplicationReadyEvent 会在应用启动完成后触发。metadata:{window: Spring Boot 启动时会创建 ApplicationContext。ApplicationReadyEvent 会在应用启动完成后触发。开发者可以监听该事件执行初始化逻辑。}node3text:开发者可以监听该事件执行初始化逻辑。metadata:{window: ApplicationReadyEvent 会在应用启动完成后触发。开发者可以监听该事件执行初始化逻辑。例如启动 HTTP Server。}检索阶段真正参与 embedding / 检索的是单句 node.text召回完成后LlamaIndex 不会直接把ApplicationReadyEvent 会在应用启动完成后触发。发给 LLM。而是把 node 替换成 metadata.windowSpring Boot 启动时会创建 ApplicationContext。ApplicationReadyEvent 会在应用启动完成后触发。开发者可以监听该事件执行初始化逻辑。这就叫MetadataReplacementPostProcessor三、demo1、commonimport sys from pathlib import Path import httpx import regex from llama_index.core.node_parser import SentenceWindowNodeParser from llama_index.core.schema import Document from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_qdrant import QdrantVectorStore from qdrant_client import QdrantClient sys.stdout.reconfigure(encodingutf-8) DEMO_DIR Path(__file__).resolve().parents[1] DOC_DIR DEMO_DIR / docs / small_appliance_kb COLLECTION_NAME window_db QDRANT_URL http://localhost:6333 WINDOW_SIZE 2 EMBEDDING_MODEL text-embedding-3-small LLM_MODEL gpt-5.1 BASE_URL https://llm-xxx.xxxx/v1 API_KEY l****** WINDOW_METADATA_KEY window ORIGINAL_TEXT_METADATA_KEY original_text class CompatQdrantClient(QdrantClient): # Keep LangChain compatibility with the older search() call path. def search( self, *, collection_name, query_vector, query_filterNone, search_paramsNone, limit10, offset0, with_payloadTrue, with_vectorsFalse, score_thresholdNone, consistencyNone, **kwargs, ): return self.query_points( collection_namecollection_name, queryquery_vector, query_filterquery_filter, search_paramssearch_params, limitlimit, offsetoffset, with_payloadwith_payload, with_vectorswith_vectors, score_thresholdscore_threshold, consistencyconsistency, **kwargs, ).points def make_client() - CompatQdrantClient: return CompatQdrantClient(urlQDRANT_URL) def make_embeddings() - OpenAIEmbeddings: # Step 1: build the embedding model used by Qdrant. return OpenAIEmbeddings( modelEMBEDDING_MODEL, api_keydummy, base_urlBASE_URL, default_headers{X-Api-Key: API_KEY}, http_clienthttpx.Client(trust_envFalse), ) def make_vectorstore(client: QdrantClient) - QdrantVectorStore: return QdrantVectorStore( clientclient, collection_nameCOLLECTION_NAME, embeddingmake_embeddings(), ) def make_llm() - ChatOpenAI: return ChatOpenAI( modelLLM_MODEL, base_urlBASE_URL, api_keydummy, default_headers{X-Api-Key: API_KEY}, http_clienthttpx.Client(trust_envFalse), ) def list_doc_paths() - list[Path]: if not DOC_DIR.exists(): return [] return sorted( path for path in DOC_DIR.iterdir() if path.is_file() and path.suffix.lower() in {.md, .txt} ) def split_sentences(text: str) - list[str]: sentences: list[str] [] normalized text.replace(\r\n, \n).strip() if not normalized: return [] for line in normalized.split(\n): line line.strip() if not line: continue parts [ part.strip() for part in regex.split(r(?[。!?])\s*, line, flagsregex.VERSION1) if part.strip() ] sentences.extend(parts or [line]) return sentences def build_sentence_nodes(doc_paths: list[Path] | None None, window_size: int WINDOW_SIZE): if doc_paths is None: doc_paths list_doc_paths() if not doc_paths: return [] documents [ Document( textpath.read_text(encodingutf-8), metadata{source: path.name}, ) for path in doc_paths ] parser SentenceWindowNodeParser.from_defaults( window_sizewindow_size, window_metadata_keyWINDOW_METADATA_KEY, original_text_metadata_keyORIGINAL_TEXT_METADATA_KEY, sentence_splittersplit_sentences, ) return parser.get_nodes_from_documents(documents)2、storefrom qdrant_client.models import Distance, VectorParams from common import ( COLLECTION_NAME, DOC_DIR, QDRANT_URL, build_sentence_nodes, list_doc_paths, make_client, make_vectorstore, ) client make_client() doc_paths list_doc_paths() if not doc_paths: raise SystemExit(fno documents found in: {DOC_DIR}) nodes build_sentence_nodes(doc_paths) if not nodes: raise SystemExit(fno sentences generated from: {DOC_DIR}) # Rebuild the collection so the demo always stays in sync with the docs folder. if client.collection_exists(COLLECTION_NAME): client.delete_collection(COLLECTION_NAME) client.create_collection( collection_nameCOLLECTION_NAME, vectors_configVectorParams(size1536, distanceDistance.COSINE), ) vectorstore make_vectorstore(client) vectorstore.add_texts( [node.text for node in nodes], metadatas[dict(node.metadata or {}) for node in nodes], ) print(fstored sentences: {len(nodes)}) print(fcollection: {COLLECTION_NAME}) print(fqdrant: {QDRANT_URL}) print(sources:) for path in doc_paths: print(f- {path.name}) client.close()执行3、askfrom llama_index.core.postprocessor import MetadataReplacementPostProcessor from llama_index.core.schema import NodeWithScore, TextNode from langchain_core.prompts import ChatPromptTemplate from common import COLLECTION_NAME, WINDOW_METADATA_KEY, make_client, make_llm, make_vectorstore client make_client() if not client.collection_exists(COLLECTION_NAME): raise SystemExit(run store.py first to build the collection.) vectorstore make_vectorstore(client) question 电热水壶第一次使用前要做什么 # Step 1: retrieve the most relevant sentence nodes. hits vectorstore.similarity_search_with_score(question, k3) nodes [ NodeWithScore( nodeTextNode(textdoc.page_content, metadatadict(doc.metadata or {})), scorescore, ) for doc, score in hits ] # Step 2: replace each sentence with its surrounding window. postprocessor MetadataReplacementPostProcessor(target_metadata_keyWINDOW_METADATA_KEY) processed_nodes postprocessor.postprocess_nodes(nodes) context \n\n.join(node.node.text for node in processed_nodes) prompt ChatPromptTemplate.from_messages( [ ( system, 你是小家电智能客服只能根据上下文回答尽量简洁直接不要补充上下文之外的信息。, ), (human, 上下文:\n{context}\n\n问题: {question}), ] ) llm make_llm() resp llm.invoke(prompt.format_messages(contextcontext, questionquestion)) print(\nanswer:) print(resp.content) client.close()执行