import wikipedia as wp
from openai import OpenAI
from langsmith import traceable, wrappers
oai_client = wrappers.wrap_openai(OpenAI())
@traceable
def generate_wiki_search(question: str) -> str:
"""生成用于在维基百科中搜索的查询。"""
instructions = (
"生成一个搜索查询以传入维基百科来回答用户的问题。"
"仅返回搜索查询,不要返回其他内容。"
"这将直接传递给维基百科搜索引擎。"
)
messages = [
{"role": "system", "content": instructions},
{"role": "user", "content": question}
]
result = oai_client.chat.completions.create(
messages=messages,
model="gpt-4.1-mini",
temperature=0,
)
return result.choices[0].message.content
@traceable(run_type="retriever")
def retrieve(query: str) -> list:
"""获取最多两个维基百科搜索结果。"""
results = []
for term in wp.search(query, results = 10):
try:
page = wp.page(term, auto_suggest=False)
results.append({
"page_content": page.summary,
"type": "Document",
"metadata": {"url": page.url}
})
except wp.DisambiguationError:
pass
if len(results) >= 2:
return results
@traceable
def generate_answer(question: str, context: str) -> str:
"""根据检索到的信息回答问题。"""
instructions = f"仅根据以下内容回答用户的问题:\n\n{context}"
messages = [
{"role": "system", "content": instructions},
{"role": "user", "content": question}
]
result = oai_client.chat.completions.create(
messages=messages,
model="gpt-4.1-mini",
temperature=0
)
return result.choices[0].message.content
@traceable
def qa_pipeline(question: str) -> str:
"""完整的流水线。"""
query = generate_wiki_search(question)
context = "\n\n".join([doc["page_content"] for doc in retrieve(query)])
return generate_answer(question, context)