Join us May 13th & May 14th at Interrupt, the Agent Conference by LangChain. Buy tickets >
[ # 'key' 是指标名称 # 'score' 是数值指标的值 {"key": string, "score": number}, # 'value' 是分类指标的值 {"key": string, "value": string}, ... # 您可以记录任意多个 ]
{results: [{ key: string, score: number }, ...]};
langsmith>=0.2.0
langsmith@0.1.32
def multiple_scores(outputs: dict, reference_outputs: dict) -> list[dict]: # 替换为实际的评估逻辑。 precision = 0.8 recall = 0.9 f1 = 0.85 return [ {"key": "precision", "score": precision}, {"key": "recall", "score": recall}, {"key": "f1", "score": f1}, ]