libs/core/langchain_core/output_parsers/base.py · langchain-ai/langchain

1"""Base parser for language model outputs."""23from __future__ import annotations45import builtins6import contextlib7from abc import ABC, abstractmethod8from typing import (9    TYPE_CHECKING,10    Any,11    Generic,12    TypeVar,13    cast,14)1516from typing_extensions import override1718from langchain_core._api import deprecated19from langchain_core.language_models import LanguageModelOutput20from langchain_core.messages import AnyMessage, BaseMessage21from langchain_core.outputs import ChatGeneration, Generation22from langchain_core.runnables import Runnable, RunnableConfig, RunnableSerializable23from langchain_core.runnables.config import run_in_executor2425if TYPE_CHECKING:26    import builtins2728    from langchain_core.prompt_values import PromptValue2930T = TypeVar("T")31OutputParserLike = Runnable[LanguageModelOutput, T]323334class BaseLLMOutputParser(ABC, Generic[T]):35    """Abstract base class for parsing the outputs of a model."""3637    @abstractmethod38    def parse_result(self, result: list[Generation], *, partial: bool = False) -> T:39        """Parse a list of candidate model `Generation` objects into a specific format.4041        Args:42            result: A list of `Generation` to be parsed.4344                The `Generation` objects are assumed to be different candidate outputs45                for a single model input.46            partial: Whether to parse the output as a partial result.4748                This is useful for parsers that can parse partial results.4950        Returns:51            Structured output.52        """5354    async def aparse_result(55        self, result: list[Generation], *, partial: bool = False56    ) -> T:57        """Parse a list of candidate model `Generation` objects into a specific format.5859        Args:60            result: A list of `Generation` to be parsed.6162                The Generations are assumed to be different candidate outputs for a63                single model input.64            partial: Whether to parse the output as a partial result.6566                This is useful for parsers that can parse partial results.6768        Returns:69            Structured output.70        """71        return await run_in_executor(None, self.parse_result, result, partial=partial)727374class BaseGenerationOutputParser(75    BaseLLMOutputParser[T], RunnableSerializable[LanguageModelOutput, T]76):77    """Base class to parse the output of an LLM call."""7879    @property80    @override81    def InputType(self) -> Any:82        """Return the input type for the parser."""83        return str | AnyMessage8485    @property86    @override87    def OutputType(self) -> type[T]:88        """Return the output type for the parser."""89        # even though mypy complains this isn't valid,90        # it is good enough for pydantic to build the schema from91        return cast("type[T]", T)  # type: ignore[misc]9293    @override94    def invoke(95        self,96        input: str | BaseMessage,97        config: RunnableConfig | None = None,98        **kwargs: Any,99    ) -> T:100        if isinstance(input, BaseMessage):101            return self._call_with_config(102                lambda inner_input: self.parse_result(103                    [ChatGeneration(message=inner_input)]104                ),105                input,106                config,107                run_type="parser",108            )109        return self._call_with_config(110            lambda inner_input: self.parse_result([Generation(text=inner_input)]),111            input,112            config,113            run_type="parser",114        )115116    @override117    async def ainvoke(118        self,119        input: str | BaseMessage,120        config: RunnableConfig | None = None,121        **kwargs: Any | None,122    ) -> T:123        if isinstance(input, BaseMessage):124            return await self._acall_with_config(125                lambda inner_input: self.aparse_result(126                    [ChatGeneration(message=inner_input)]127                ),128                input,129                config,130                run_type="parser",131            )132        return await self._acall_with_config(133            lambda inner_input: self.aparse_result([Generation(text=inner_input)]),134            input,135            config,136            run_type="parser",137        )138139140class BaseOutputParser(141    BaseLLMOutputParser[T], RunnableSerializable[LanguageModelOutput, T]142):143    """Base class to parse the output of an LLM call.144145    Output parsers help structure language model responses.146147    Example:148        ```python149        # Implement a simple boolean output parser150151152        class BooleanOutputParser(BaseOutputParser[bool]):153            true_val: str = "YES"154            false_val: str = "NO"155156            def parse(self, text: str) -> bool:157                cleaned_text = text.strip().upper()158                if cleaned_text not in (159                    self.true_val.upper(),160                    self.false_val.upper(),161                ):162                    raise OutputParserException(163                        f"BooleanOutputParser expected output value to either be "164                        f"{self.true_val} or {self.false_val} (case-insensitive). "165                        f"Received {cleaned_text}."166                    )167                return cleaned_text == self.true_val.upper()168169            @property170            def _type(self) -> str:171                return "boolean_output_parser"172        ```173    """174175    @property176    @override177    def InputType(self) -> Any:178        """Return the input type for the parser."""179        return str | AnyMessage180181    @property182    @override183    def OutputType(self) -> type[T]:184        """Return the output type for the parser.185186        This property is inferred from the first type argument of the class.187188        Raises:189            TypeError: If the class doesn't have an inferable `OutputType`.190        """191        for base in self.__class__.mro():192            if hasattr(base, "__pydantic_generic_metadata__"):193                metadata = base.__pydantic_generic_metadata__194                if "args" in metadata and len(metadata["args"]) > 0:195                    return cast("type[T]", metadata["args"][0])196197        msg = (198            f"Runnable {self.__class__.__name__} doesn't have an inferable OutputType. "199            "Override the OutputType property to specify the output type."200        )201        raise TypeError(msg)202203    @override204    def invoke(205        self,206        input: str | BaseMessage,207        config: RunnableConfig | None = None,208        **kwargs: Any,209    ) -> T:210        if isinstance(input, BaseMessage):211            return self._call_with_config(212                lambda inner_input: self.parse_result(213                    [ChatGeneration(message=inner_input)]214                ),215                input,216                config,217                run_type="parser",218            )219        return self._call_with_config(220            lambda inner_input: self.parse_result([Generation(text=inner_input)]),221            input,222            config,223            run_type="parser",224        )225226    @override227    async def ainvoke(228        self,229        input: str | BaseMessage,230        config: RunnableConfig | None = None,231        **kwargs: Any | None,232    ) -> T:233        if isinstance(input, BaseMessage):234            return await self._acall_with_config(235                lambda inner_input: self.aparse_result(236                    [ChatGeneration(message=inner_input)]237                ),238                input,239                config,240                run_type="parser",241            )242        return await self._acall_with_config(243            lambda inner_input: self.aparse_result([Generation(text=inner_input)]),244            input,245            config,246            run_type="parser",247        )248249    @override250    def parse_result(self, result: list[Generation], *, partial: bool = False) -> T:251        """Parse a list of candidate model `Generation` objects into a specific format.252253        The return value is parsed from only the first `Generation` in the result, which254        is assumed to be the highest-likelihood `Generation`.255256        Args:257            result: A list of `Generation` to be parsed.258259                The `Generation` objects are assumed to be different candidate outputs260                for a single model input.261            partial: Whether to parse the output as a partial result.262263                This is useful for parsers that can parse partial results.264265        Returns:266            Structured output.267        """268        return self.parse(result[0].text)269270    @abstractmethod271    def parse(self, text: str) -> T:272        """Parse a single string model output into some structure.273274        Args:275            text: String output of a language model.276277        Returns:278            Structured output.279        """280281    async def aparse_result(282        self, result: list[Generation], *, partial: bool = False283    ) -> T:284        """Parse a list of candidate model `Generation` objects into a specific format.285286        The return value is parsed from only the first `Generation` in the result, which287        is assumed to be the highest-likelihood `Generation`.288289        Args:290            result: A list of `Generation` to be parsed.291292                The `Generation` objects are assumed to be different candidate outputs293                for a single model input.294            partial: Whether to parse the output as a partial result.295296                This is useful for parsers that can parse partial results.297298        Returns:299            Structured output.300        """301        return await run_in_executor(None, self.parse_result, result, partial=partial)302303    async def aparse(self, text: str) -> T:304        """Async parse a single string model output into some structure.305306        Args:307            text: String output of a language model.308309        Returns:310            Structured output.311        """312        return await run_in_executor(None, self.parse, text)313314    # TODO: rename 'completion' -> 'text'.315    def parse_with_prompt(316        self,317        completion: str,318        prompt: PromptValue,  # noqa: ARG002319    ) -> Any:320        """Parse the output of an LLM call with the input prompt for context.321322        The prompt is largely provided in the event the `OutputParser` wants to retry or323        fix the output in some way, and needs information from the prompt to do so.324325        Args:326            completion: String output of a language model.327            prompt: Input `PromptValue`.328329        Returns:330            Structured output.331        """332        return self.parse(completion)333334    def get_format_instructions(self) -> str:335        """Instructions on how the LLM output should be formatted."""336        raise NotImplementedError337338    @property339    def _type(self) -> str:340        """Return the output parser type for serialization."""341        msg = (342            f"_type property is not implemented in class {self.__class__.__name__}."343            " This is required for serialization."344        )345        raise NotImplementedError(msg)346347    @deprecated("1.4.2", alternative="asdict", removal="2.0.0")348    @override349    def dict(self, **kwargs: Any) -> builtins.dict[str, Any]:350        """DEPRECATED - use `asdict()` instead.351352        Return a dictionary representation of the output parser.353        """354        return self.asdict(**kwargs)355356    def asdict(self, **kwargs: Any) -> builtins.dict[str, Any]:357        """Return a dictionary representation of the output parser."""358        output_parser_dict = super().model_dump(**kwargs)359        with contextlib.suppress(NotImplementedError):360            output_parser_dict["_type"] = self._type361        return output_parser_dict