libs/core/langchain_core/utils/json.py PYTHON 229 lines View on github.com → Search inside
1"""Utilities for JSON."""23from __future__ import annotations45import json6import re7from typing import TYPE_CHECKING, Any89from langchain_core.exceptions import OutputParserException1011if TYPE_CHECKING:12    from collections.abc import Callable131415def _replace_new_line(match: re.Match[str]) -> str:16    """Replace newline characters in a regex match with escaped sequences.1718    Args:19        match: Regex match object containing the string to process.2021    Returns:22        String with newlines, carriage returns, tabs, and quotes properly escaped.23    """24    value = match.group(2)25    value = re.sub(r"\n", r"\\n", value)26    value = re.sub(r"\r", r"\\r", value)27    value = re.sub(r"\t", r"\\t", value)28    value = re.sub(r'(?<!\\)"', r"\"", value)2930    return match.group(1) + value + match.group(3)313233def _custom_parser(multiline_string: str | bytes | bytearray) -> str:34    r"""Custom parser for multiline strings.3536    The LLM response for `action_input` may be a multiline string containing unescaped37    newlines, tabs or quotes. This function replaces those characters with their escaped38    counterparts. (newlines in JSON must be double-escaped: `\\n`).3940    Returns:41        The modified string with escaped newlines, tabs and quotes.42    """43    if isinstance(multiline_string, (bytes, bytearray)):44        multiline_string = multiline_string.decode()4546    return re.sub(47        r'("action_input"\:\s*")(.*?)(")',48        _replace_new_line,49        multiline_string,50        flags=re.DOTALL,51    )525354# Adapted from https://github.com/KillianLucas/open-interpreter/blob/5b6080fae1f8c68938a1e4fa8667e3744084ee21/interpreter/utils/parse_partial_json.py55# MIT License565758def parse_partial_json(s: str, *, strict: bool = False) -> Any:59    """Parse a JSON string that may be missing closing braces.6061    Args:62        s: The JSON string to parse.63        strict: Whether to use strict parsing.6465    Returns:66        The parsed JSON object as a Python dictionary.67    """68    # Attempt to parse the string as-is.69    try:70        return json.loads(s, strict=strict)71    except json.JSONDecodeError:72        pass7374    # Initialize variables.75    new_chars = []76    stack = []77    is_inside_string = False78    escaped = False7980    # Process each character in the string one at a time.81    for char in s:82        new_char = char83        if is_inside_string:84            if char == '"' and not escaped:85                is_inside_string = False86            elif char == "\n" and not escaped:87                new_char = (88                    "\\n"  # Replace the newline character with the escape sequence.89                )90            elif char == "\\":91                escaped = not escaped92            else:93                escaped = False94        elif char == '"':95            is_inside_string = True96            escaped = False97        elif char == "{":98            stack.append("}")99        elif char == "[":100            stack.append("]")101        elif char in {"}", "]"}:102            if stack and stack[-1] == char:103                stack.pop()104            else:105                # Mismatched closing character; the input is malformed.106                return None107108        # Append the processed character to the new string.109        new_chars.append(new_char)110111    # If we're still inside a string at the end of processing,112    # we need to close the string.113    if is_inside_string:114        if escaped:  # Remove unterminated escape character115            new_chars.pop()116        new_chars.append('"')117118    # Reverse the stack to get the closing characters.119    stack.reverse()120121    # Try to parse mods of string until we succeed or run out of characters.122    while new_chars:123        # Close any remaining open structures in the reverse124        # order that they were opened.125        # Attempt to parse the modified string as JSON.126        try:127            return json.loads("".join(new_chars + stack), strict=strict)128        except json.JSONDecodeError:129            # If we still can't parse the string as JSON,130            # try removing the last character131            new_chars.pop()132133    # If we got here, we ran out of characters to remove134    # and still couldn't parse the string as JSON, so return the parse error135    # for the original string.136    return json.loads(s, strict=strict)137138139_json_markdown_re = re.compile(r"```(json)?(.*)", re.DOTALL)140141142def parse_json_markdown(143    json_string: str, *, parser: Callable[[str], Any] = parse_partial_json144) -> Any:145    """Parse a JSON string from a Markdown string.146147    Args:148        json_string: The Markdown string.149        parser: The parser to use.150151    Returns:152        The parsed JSON object as a Python dictionary.153    """154    try:155        return _parse_json(json_string, parser=parser)156    except json.JSONDecodeError:157        # Try to find JSON string within triple backticks158        match = _json_markdown_re.search(json_string)159160        # If no match found, assume the entire string is a JSON string161        # Else, use the content within the backticks162        json_str = json_string if match is None else match.group(2)163    return _parse_json(json_str, parser=parser)164165166_json_strip_chars = " \n\r\t`"167168169def _parse_json(170    json_str: str, *, parser: Callable[[str], Any] = parse_partial_json171) -> Any:172    """Parse a JSON string, handling special characters and whitespace.173174    Strips whitespace, newlines, and backticks from the start and end of the string,175    then processes special characters before parsing.176177    Args:178        json_str: The JSON string to parse.179        parser: Optional custom parser function.180181    Returns:182        Parsed JSON object.183    """184    # Strip whitespace,newlines,backtick from the start and end185    json_str = json_str.strip(_json_strip_chars)186187    # handle newlines and other special characters inside the returned value188    json_str = _custom_parser(json_str)189190    # Parse the JSON string into a Python dictionary191    return parser(json_str)192193194def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:195    """Parse and check a JSON string from a Markdown string.196197    Checks that it contains the expected keys.198199    Args:200        text: The Markdown string.201        expected_keys: The expected keys in the JSON string.202203    Returns:204        The parsed JSON object as a Python dictionary.205206    Raises:207        OutputParserException: If the JSON string is invalid or does not contain208            the expected keys.209    """210    try:211        json_obj = parse_json_markdown(text)212    except json.JSONDecodeError as e:213        msg = f"Got invalid JSON object. Error: {e}"214        raise OutputParserException(msg) from e215    if not isinstance(json_obj, dict):216        error_message = (217            f"Expected JSON object (dict), but got: {type(json_obj).__name__}. "218        )219        raise OutputParserException(error_message, llm_output=text)220221    for key in expected_keys:222        if key not in json_obj:223            msg = (224                f"Got invalid return object. Expected key `{key}` "225                f"to be present, but got {json_obj}"226            )227            raise OutputParserException(msg)228    return json_obj

Code quality findings 3

Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(multiline_string, (bytes, bytearray)):
Ensure functions have docstrings for documentation
missing-docstring
def parse_json_markdown(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if not isinstance(json_obj, dict):

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.