libs/core/langchain_core/output_parsers/openai_tools.py · langchain-ai/langchain

1"""Parse tools for OpenAI tools output."""23import copy4import json5import logging6from json import JSONDecodeError7from typing import Annotated, Any89from pydantic import BaseModel, SkipValidation, ValidationError10from pydantic.v1 import BaseModel as BaseModelV11112from langchain_core.exceptions import OutputParserException13from langchain_core.messages import AIMessage, InvalidToolCall14from langchain_core.messages.tool import invalid_tool_call15from langchain_core.messages.tool import tool_call as create_tool_call16from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser17from langchain_core.outputs import ChatGeneration, Generation18from langchain_core.utils.json import parse_partial_json19from langchain_core.utils.pydantic import (20    TypeBaseModel,21)2223logger = logging.getLogger(__name__)242526def parse_tool_call(27    raw_tool_call: dict[str, Any],28    *,29    partial: bool = False,30    strict: bool = False,31    return_id: bool = True,32) -> dict[str, Any] | None:33    """Parse a single tool call.3435    Args:36        raw_tool_call: The raw tool call to parse.37        partial: Whether to parse partial JSON.38        strict: Whether to allow non-JSON-compliant strings.39        return_id: Whether to return the tool call id.4041    Returns:42        The parsed tool call.4344    Raises:45        OutputParserException: If the tool call is not valid JSON.46    """47    if "function" not in raw_tool_call:48        return None4950    arguments = raw_tool_call["function"]["arguments"]5152    if partial:53        try:54            function_args = parse_partial_json(arguments, strict=strict)55        except (JSONDecodeError, TypeError):  # None args raise TypeError56            return None57    # Handle None or empty string arguments for parameter-less tools58    elif not arguments:59        function_args = {}60    else:61        try:62            function_args = json.loads(arguments, strict=strict)63        except JSONDecodeError as e:64            msg = (65                f"Function {raw_tool_call['function']['name']} arguments:\n\n"66                f"{arguments}\n\nare not valid JSON. "67                f"Received JSONDecodeError {e}"68            )69            raise OutputParserException(msg) from e70    parsed = {71        "name": raw_tool_call["function"]["name"] or "",72        "args": function_args or {},73    }74    if return_id:75        parsed["id"] = raw_tool_call.get("id")76        parsed = create_tool_call(**parsed)  # type: ignore[assignment,arg-type]77    return parsed787980def make_invalid_tool_call(81    raw_tool_call: dict[str, Any],82    error_msg: str | None,83) -> InvalidToolCall:84    """Create an `InvalidToolCall` from a raw tool call.8586    Args:87        raw_tool_call: The raw tool call.88        error_msg: The error message.8990    Returns:91        An `InvalidToolCall` instance with the error message.92    """93    return invalid_tool_call(94        name=raw_tool_call["function"]["name"],95        args=raw_tool_call["function"]["arguments"],96        id=raw_tool_call.get("id"),97        error=error_msg,98    )99100101def parse_tool_calls(102    raw_tool_calls: list[dict[str, Any]],103    *,104    partial: bool = False,105    strict: bool = False,106    return_id: bool = True,107) -> list[dict[str, Any]]:108    """Parse a list of tool calls.109110    Args:111        raw_tool_calls: The raw tool calls to parse.112        partial: Whether to parse partial JSON.113        strict: Whether to allow non-JSON-compliant strings.114        return_id: Whether to return the tool call id.115116    Returns:117        The parsed tool calls.118119    Raises:120        OutputParserException: If any of the tool calls are not valid JSON.121    """122    final_tools: list[dict[str, Any]] = []123    exceptions = []124    for tool_call in raw_tool_calls:125        try:126            parsed = parse_tool_call(127                tool_call, partial=partial, strict=strict, return_id=return_id128            )129            if parsed:130                final_tools.append(parsed)131        except OutputParserException as e:132            exceptions.append(str(e))133            continue134    if exceptions:135        raise OutputParserException("\n\n".join(exceptions))136    return final_tools137138139class JsonOutputToolsParser(BaseCumulativeTransformOutputParser[Any]):140    """Parse tools from OpenAI response."""141142    strict: bool = False143    """Whether to allow non-JSON-compliant strings.144145    See: https://docs.python.org/3/library/json.html#encoders-and-decoders146147    Useful when the parsed output may include unicode characters or new lines.148    """149150    return_id: bool = False151    """Whether to return the tool call id."""152153    first_tool_only: bool = False154    """Whether to return only the first tool call.155156    If `False`, the result will be a list of tool calls, or an empty list if no tool157    calls are found.158159    If `True`, and multiple tool calls are found, only the first one will be returned,160    and the other tool calls will be ignored.161162    If no tool calls are found, `None` will be returned.163    """164165    def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any:166        """Parse the result of an LLM call to a list of tool calls.167168        Args:169            result: The result of the LLM call.170            partial: Whether to parse partial JSON.171172                If `True`, the output will be a JSON object containing173                all the keys that have been returned so far.174175                If `False`, the output will be the full JSON object.176177        Returns:178            The parsed tool calls.179180        Raises:181            OutputParserException: If the output is not valid JSON.182        """183        generation = result[0]184        if not isinstance(generation, ChatGeneration):185            msg = "This output parser can only be used with a chat generation."186            raise OutputParserException(msg)187        message = generation.message188        if isinstance(message, AIMessage) and message.tool_calls:189            tool_calls = [dict(tc) for tc in message.tool_calls]190            for tool_call in tool_calls:191                if not self.return_id:192                    _ = tool_call.pop("id")193        else:194            try:195                raw_tool_calls = copy.deepcopy(message.additional_kwargs["tool_calls"])196            except KeyError:197                return []198            tool_calls = parse_tool_calls(199                raw_tool_calls,200                partial=partial,201                strict=self.strict,202                return_id=self.return_id,203            )204        # for backwards compatibility205        for tc in tool_calls:206            tc["type"] = tc.pop("name")207208        if self.first_tool_only:209            return tool_calls[0] if tool_calls else None210        return tool_calls211212    def parse(self, text: str) -> Any:213        """Parse the output of an LLM call to a list of tool calls.214215        Args:216            text: The output of the LLM call.217218        Returns:219            The parsed tool calls.220        """221        raise NotImplementedError222223224class JsonOutputKeyToolsParser(JsonOutputToolsParser):225    """Parse tools from OpenAI response."""226227    key_name: str228    """The type of tools to return."""229230    def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any:231        """Parse the result of an LLM call to a list of tool calls.232233        Args:234            result: The result of the LLM call.235            partial: Whether to parse partial JSON.236                If `True`, the output will be a JSON object containing237                    all the keys that have been returned so far.238                If `False`, the output will be the full JSON object.239240        Raises:241            OutputParserException: If the generation is not a chat generation.242243        Returns:244            The parsed tool calls.245        """246        generation = result[0]247        if not isinstance(generation, ChatGeneration):248            msg = "This output parser can only be used with a chat generation."249            raise OutputParserException(msg)250        message = generation.message251        if isinstance(message, AIMessage) and message.tool_calls:252            parsed_tool_calls = [dict(tc) for tc in message.tool_calls]253            for tool_call in parsed_tool_calls:254                if not self.return_id:255                    _ = tool_call.pop("id")256        else:257            try:258                # This exists purely for backward compatibility / cached messages259                # All new messages should use `message.tool_calls`260                raw_tool_calls = copy.deepcopy(message.additional_kwargs["tool_calls"])261            except KeyError:262                if self.first_tool_only:263                    return None264                return []265            parsed_tool_calls = parse_tool_calls(266                raw_tool_calls,267                partial=partial,268                strict=self.strict,269                return_id=self.return_id,270            )271        # For backwards compatibility272        for tc in parsed_tool_calls:273            tc["type"] = tc.pop("name")274        if self.first_tool_only:275            parsed_result = list(276                filter(lambda x: x["type"] == self.key_name, parsed_tool_calls)277            )278            single_result = (279                parsed_result[0]280                if parsed_result and parsed_result[0]["type"] == self.key_name281                else None282            )283            if self.return_id:284                return single_result285            if single_result:286                return single_result["args"]287            return None288        return (289            [res for res in parsed_tool_calls if res["type"] == self.key_name]290            if self.return_id291            else [292                res["args"] for res in parsed_tool_calls if res["type"] == self.key_name293            ]294        )295296297# Common cause of ValidationError is truncated output due to max_tokens.298_MAX_TOKENS_ERROR = (299    "Output parser received a `max_tokens` stop reason. "300    "The output is likely incomplete—please increase `max_tokens` "301    "or shorten your prompt."302)303304305class PydanticToolsParser(JsonOutputToolsParser):306    """Parse tools from OpenAI response."""307308    tools: Annotated[list[TypeBaseModel], SkipValidation()]309    """The tools to parse."""310311    # TODO: Support more granular streaming of objects.312    # Currently only streams once all Pydantic object fields are present.313    def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any:314        """Parse the result of an LLM call to a list of Pydantic objects.315316        Args:317            result: The result of the LLM call.318            partial: Whether to parse partial JSON.319320                If `True`, the output will be a JSON object containing all the keys that321                have been returned so far.322323                If `False`, the output will be the full JSON object.324325        Returns:326            The parsed Pydantic objects.327328        Raises:329            ValueError: If the tool call arguments are not a dict.330            ValidationError: If the tool call arguments do not conform to the Pydantic331                model.332        """333        json_results = super().parse_result(result, partial=partial)334        if not json_results:335            return None if self.first_tool_only else []336337        json_results = [json_results] if self.first_tool_only else json_results338        name_dict_v2: dict[str, TypeBaseModel] = {339            tool.model_config.get("title") or tool.__name__: tool340            for tool in self.tools341            if issubclass(tool, BaseModel)342        }343        name_dict_v1: dict[str, TypeBaseModel] = {344            tool.__name__: tool for tool in self.tools if issubclass(tool, BaseModelV1)345        }346        name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}347        pydantic_objects = []348        for res in json_results:349            if not isinstance(res["args"], dict):350                if partial:351                    continue352                msg = (353                    f"Tool arguments must be specified as a dict, received: "354                    f"{res['args']}"355                )356                raise ValueError(msg)357358            try:359                tool = name_dict[res["type"]]360            except KeyError as e:361                available = ", ".join(name_dict.keys()) or "<no_tools>"362                msg = (363                    f"Unknown tool type: {res['type']!r}. Available tools: {available}"364                )365                raise OutputParserException(msg) from e366367            try:368                pydantic_objects.append(tool(**res["args"]))369            except (ValidationError, ValueError):370                if partial:371                    continue372                has_max_tokens_stop_reason = any(373                    generation.message.response_metadata.get("stop_reason")374                    == "max_tokens"375                    for generation in result376                    if isinstance(generation, ChatGeneration)377                )378                if has_max_tokens_stop_reason:379                    logger.exception(_MAX_TOKENS_ERROR)380                raise381        if self.first_tool_only:382            return pydantic_objects[0] if pydantic_objects else None383        return pydantic_objects