Chapter 10

实战:构建数学解题 + 代码调试推理 Agent

完整端到端项目:集成 Claude Extended Thinking,构建能自我验证、自我纠错的推理 Agent。

系统架构

用户输入 │ ▼ ┌─────────────────────────────────────┐ │ 任务分类器(Haiku) │ │ 判断: math / code_debug / general │ └──────────────────┬──────────────────┘ │ ┌──────────┴──────────┐ │ │ ▼ ▼ ┌──────────────┐ ┌──────────────────┐ │ 数学推理器 │ │ 代码调试 Agent │ │ (Sonnet + │ │ (Sonnet + │ │ thinking │ │ thinking + │ │ + 验证) │ │ 代码执行) │ └──────┬───────┘ └──────────┬───────┘ │ │ └──────────┬──────────────┘ ▼ ┌─────────────────┐ │ 结果格式化输出 │ └─────────────────┘

数学解题器(含自动验证)

import anthropic
import sympy
from sympy.parsing.latex import parse_latex
import re

client = anthropic.Anthropic()

class MathReasoningAgent:
    """数学推理 Agent:解题 + 自动验证"""

    def solve(self, problem: str, verify: bool = True) -> dict:
        # Step 1: 推理求解
        response = client.messages.create(
            model="claude-sonnet-4-6",
            max_tokens=16000,
            thinking={"type": "enabled", "budget_tokens": 10000},
            system="""你是数学解题专家。
最终答案必须用 \\boxed{} 格式,例如 \\boxed{42} 或 \\boxed{x=3}。
如果是多步骤计算,最后一行必须是 \\boxed{答案}。""",
            messages=[{"role": "user", "content": problem}]
        )

        thinking = next((b.thinking for b in response.content if b.type == "thinking"), "")
        solution = next(b.text for b in response.content if b.type == "text")

        # 提取 \boxed{} 中的答案
        answer = self.extract_boxed_answer(solution)

        result = {
            "problem": problem,
            "thinking_tokens": len(thinking.split()),
            "solution": solution,
            "answer": answer,
            "verified": None
        }

        # Step 2: 数值验证(用 sympy)
        if verify and answer:
            result["verified"] = self.verify_with_sympy(problem, answer)

        return result

    def extract_boxed_answer(self, text: str) -> str:
        match = re.search(r'\\boxed\{([^}]+)\}', text)
        return match.group(1) if match else ""

    def verify_with_sympy(self, problem: str, answer: str) -> bool:
        try:
            expr = parse_latex(answer)
            return expr is not None
        except:
            return None  # 无法验证

代码调试 Agent(执行反馈循环)

import subprocess
import tempfile
import os

class CodeDebuggingAgent:
    """代码调试 Agent:推理分析 → 修复 → 执行验证 → 迭代"""

    MAX_ITERATIONS = 3

    def debug(self, code: str, error: str = "") -> dict:
        iteration = 0
        current_code = code
        history = []

        while iteration < self.MAX_ITERATIONS:
            iteration += 1

            # 1. 推理分析并修复
            fix_prompt = f"""分析以下代码的问题并修复:

```python
{current_code}
```
{"错误信息:" + error if error else ""}

输出修复后的完整代码(只输出代码,不要说明):"""

            response = client.messages.create(
                model="claude-sonnet-4-6",
                max_tokens=8000,
                thinking={"type": "enabled", "budget_tokens": 5000},
                messages=[{"role": "user", "content": fix_prompt}]
            )

            fixed_code = self.extract_code(
                next(b.text for b in response.content if b.type == "text")
            )

            # 2. 执行验证
            exec_result = self.safe_execute(fixed_code)
            history.append({
                "iteration": iteration,
                "fixed_code": fixed_code,
                "exec_result": exec_result
            })

            if exec_result["success"]:
                return {"status": "fixed", "final_code": fixed_code,
                         "iterations": iteration, "history": history}

            # 下一次迭代用新错误继续
            current_code = fixed_code
            error = exec_result["error"]

        return {"status": "failed", "history": history}

    def safe_execute(self, code: str) -> dict:
        """在沙箱中执行代码"""
        with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
            f.write(code)
            tmpfile = f.name

        try:
            result = subprocess.run(
                ["python", tmpfile],
                capture_output=True, text=True, timeout=10
            )
            return {
                "success": result.returncode == 0,
                "stdout": result.stdout,
                "error": result.stderr
            }
        except subprocess.TimeoutExpired:
            return {"success": False, "error": "Timeout (>10s)"}
        finally:
            os.unlink(tmpfile)

主入口:统一 Agent

class ReasoningAssistant:
    """统一入口:根据任务类型路由到对应 Agent"""

    def __init__(self):
        self.math_agent = MathReasoningAgent()
        self.code_agent = CodeDebuggingAgent()

    def classify_task(self, query: str) -> str:
        response = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=10,
            messages=[{"role": "user", "content":
                f"分类为 math/code/general(只输出一个词):{query[:200]}"}]
        )
        return response.content[0].text.strip().lower()

    def process(self, query: str) -> dict:
        task_type = self.classify_task(query)

        if "math" in task_type:
            return {"type": "math", "result": self.math_agent.solve(query)}
        elif "code" in task_type:
            code = self.extract_code_from_query(query)
            return {"type": "code", "result": self.code_agent.debug(code)}
        else:
            # 通用推理问答
            response = client.messages.create(
                model="claude-sonnet-4-6",
                max_tokens=4000,
                thinking={"type": "enabled", "budget_tokens": 3000},
                messages=[{"role": "user", "content": query}]
            )
            return {"type": "general",
                    "result": next(b.text for b in response.content if b.type == "text")}

# 使用示例
agent = ReasoningAssistant()
result = agent.process("求解:x² - 5x + 6 = 0")
print(result)
项目扩展方向 1. 加入 Web 界面(FastAPI + React)2. 支持 LaTeX 渲染数学公式 3. 增加代码执行沙箱安全隔离(Docker)4. 添加对话历史管理 5. 实现流式输出显示思考进度
课程完结 恭喜完成《AI 推理模型开发》全部 10 章!从慢思考原理、CoT 工程、Extended Thinking API,到生产级 Agent 架构,你已掌握推理模型的完整工程体系。AI 推理时代正在到来。