Ryanhub - file viewer
filename: chat/parser.py
branch: main
back to repo
import json


def parse_router_output(text):
    """
    STRICT parser for router output.

    Router contract:
    - Output MUST be exactly ONE JSON object.
    - Must have key "tool" (string). Key "arguments" is optional; if missing or not an object, defaults to {}.

    Rules enforced here:
    - No recovery heuristics.
    - No markdown stripping.
    - No regex scanning.
    - Fail fast on invalid output.

    Returns:
        {"tool": str, "arguments": dict}
        OR
        {"error": "parse_error", "message": str, "raw": ...}
    """

    if not isinstance(text, str):
        return {
            "error": "parse_error",
            "message": "router output is not text",
            "raw": text,
        }

    try:
        data = json.loads(text)
    except Exception as e:
        return {
            "error": "parse_error",
            "message": f"invalid JSON: {e}",
            "raw": text,
        }

    if not isinstance(data, dict):
        return {
            "error": "parse_error",
            "message": "parsed JSON is not an object",
            "raw": data,
        }

    if "tool" not in data:
        return {
            "error": "parse_error",
            "message": "router must return at least key 'tool'",
            "raw": data,
        }

    tool = data.get("tool")
    if not isinstance(tool, str):
        return {
            "error": "parse_error",
            "message": "tool must be a string",
            "raw": data,
        }

    arguments = data.get("arguments")
    if not isinstance(arguments, dict):
        arguments = {}

    return {
        "tool": tool,
        "arguments": arguments,
    }


def _strip_fenced_json(text):
    s = str(text or "").strip()
    if not s.startswith("```"):
        return s
    lines = s.splitlines()
    if len(lines) >= 2 and lines[-1].strip() == "```":
        return "\n".join(lines[1:-1]).strip()
    return s


def _recover_first_json_object(text):
    s = str(text or "").strip()
    try:
        obj, _ = json.JSONDecoder().raw_decode(s)
        return json.dumps(obj, separators=(",", ":"))
    except Exception:
        return None


def parse_with_recovery(raw_text, strict_parser):
    parsed = strict_parser(raw_text)
    if isinstance(parsed, dict) and not parsed.get("error"):
        return parsed

    stripped = _strip_fenced_json(raw_text)
    if stripped != str(raw_text or ""):
        parsed = strict_parser(stripped)
        if isinstance(parsed, dict) and not parsed.get("error"):
            return parsed

    recovered = _recover_first_json_object(stripped)
    if recovered:
        parsed = strict_parser(recovered)
        if isinstance(parsed, dict) and not parsed.get("error"):
            return parsed

    return strict_parser(raw_text)


def parse_router_reply(raw_text):
    return parse_with_recovery(raw_text, parse_router_output)