libs/standard-tests/langchain_tests/integration_tests/chat_models.py · langchain-ai/langchain

1"""Integration tests for chat models."""23from __future__ import annotations45import base646import json7import os8import warnings9from typing import TYPE_CHECKING, Annotated, Any, Literal, cast10from unittest.mock import MagicMock1112import httpx13import pytest14from langchain_core.callbacks import BaseCallbackHandler15from langchain_core.language_models import BaseChatModel, GenericFakeChatModel16from langchain_core.language_models.chat_model_stream import (17    AsyncChatModelStream,18    ChatModelStream,19)20from langchain_core.messages import (21    AIMessage,22    AIMessageChunk,23    BaseMessage,24    HumanMessage,25    SystemMessage,26    ToolMessage,27)28from langchain_core.output_parsers import StrOutputParser29from langchain_core.prompts import ChatPromptTemplate30from langchain_core.tools import BaseTool, tool31from langchain_core.utils.function_calling import (32    convert_to_json_schema,33    tool_example_to_messages,34)35from pydantic import BaseModel, Field36from pydantic.v1 import BaseModel as BaseModelV137from pydantic.v1 import Field as FieldV138from typing_extensions import TypedDict, override3940from langchain_tests.unit_tests.chat_models import ChatModelTests41from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION42from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream4344if TYPE_CHECKING:45    from pytest_benchmark.fixture import (46        BenchmarkFixture,47    )48    from vcr.cassette import Cassette495051def _get_joke_class(  # noqa: RET50352    schema_type: Literal["pydantic", "typeddict", "json_schema"],53) -> Any:54    class Joke(BaseModel):55        """Joke to tell user."""5657        setup: str = Field(description="question to set up a joke")58        punchline: str = Field(description="answer to resolve the joke")5960    def validate_joke(result: Any) -> bool:61        return isinstance(result, Joke)6263    class JokeDict(TypedDict):64        """Joke to tell user."""6566        setup: Annotated[str, ..., "question to set up a joke"]67        punchline: Annotated[str, ..., "answer to resolve the joke"]6869    def validate_joke_dict(result: Any) -> bool:70        return all(key in {"setup", "punchline"} for key in result)7172    if schema_type == "pydantic":73        return Joke, validate_joke7475    if schema_type == "typeddict":76        return JokeDict, validate_joke_dict7778    if schema_type == "json_schema":79        return Joke.model_json_schema(), validate_joke_dict808182class _TestCallbackHandler(BaseCallbackHandler):83    options: list[dict[str, Any] | None]8485    def __init__(self) -> None:86        super().__init__()87        self.options = []8889    @override90    def on_chat_model_start(91        self,92        serialized: Any,93        messages: Any,94        *,95        options: dict[str, Any] | None = None,96        **kwargs: Any,97    ) -> None:98        self.options.append(options)99100101class _MagicFunctionSchema(BaseModel):102    input: int = Field(..., gt=-1000, lt=1000)103104105@tool(args_schema=_MagicFunctionSchema)106def magic_function(_input: int) -> int:107    """Apply a magic function to an input."""108    return _input + 2109110111@tool112def magic_function_no_args() -> int:113    """Calculate a magic function."""114    return 5115116117def _validate_tool_call_message(message: BaseMessage) -> None:118    assert isinstance(message, AIMessage)119    assert len(message.tool_calls) == 1120121    tool_call = message.tool_calls[0]122    assert tool_call["name"] == "magic_function"123    assert tool_call["args"] == {"input": 3}124    assert tool_call["id"] is not None125    assert tool_call.get("type") == "tool_call"126127    content_tool_calls = [128        block for block in message.content_blocks if block["type"] == "tool_call"129    ]130    assert len(content_tool_calls) == 1131    content_tool_call = content_tool_calls[0]132    assert content_tool_call["name"] == "magic_function"133    assert content_tool_call["args"] == {"input": 3}134    assert content_tool_call["id"] is not None135136137def _validate_tool_call_chunk(chunk: AIMessageChunk) -> bool:138    """Check whether a streaming chunk contains valid `tool_call_chunk` blocks.139140    Returns:141        `True` if at least one `tool_call_chunk` block was found.142    """143    found = False144    for block in chunk.content_blocks:145        if block.get("type") == "tool_call_chunk":146            found = True147            assert "name" in block, "tool_call_chunk block missing 'name' field"148            assert "args" in block, "tool_call_chunk block missing 'args' field"149            assert "id" in block, "tool_call_chunk block missing 'id' field"150    return found151152153def _validate_tool_call_message_no_args(message: BaseMessage) -> None:154    assert isinstance(message, AIMessage)155    assert len(message.tool_calls) == 1156157    tool_call = message.tool_calls[0]158    assert tool_call["name"] == "magic_function_no_args"159    assert tool_call["args"] == {}160    assert tool_call["id"] is not None161    assert tool_call.get("type") == "tool_call"162163164def _get_base64_from_url(url: str) -> str:165    user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT")166    if not user_agent:167        warning_message = (168            "LANGCHAIN_TESTS_USER_AGENT environment variable not set. "169            "langchain-tests pulls (CC0 License) audio data from wikimedia.org. "170            "Consider setting a user agent to identify your requests. See "171            "https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy"172        )173        warnings.warn(warning_message, stacklevel=2)174    headers = {"User-Agent": user_agent} if user_agent else {}175    httpx_response = httpx.get(url, headers=headers, timeout=10.0).content176    return base64.b64encode(httpx_response).decode("utf-8")177178179@tool180def unicode_customer(customer_name: str, description: str) -> str:181    """Tool for creating a customer with Unicode name.182183    Args:184        customer_name: The customer's name in their native language.185        description: Description of the customer.186187    Returns:188        A confirmation message about the customer creation.189190    """191    return f"Created customer: {customer_name} - {description}"192193194class ChatModelIntegrationTests(ChatModelTests):195    '''Base class for chat model integration tests.196197    Test subclasses must implement the `chat_model_class` and198    `chat_model_params` properties to specify what model to test and its199    initialization parameters.200201    ```python202    from typing import Type203204    from langchain_tests.integration_tests import ChatModelIntegrationTests205    from my_package.chat_models import MyChatModel206207208    class TestMyChatModelIntegration(ChatModelIntegrationTests):209        @property210        def chat_model_class(self) -> Type[MyChatModel]:211            # Return the chat model class to test here212            return MyChatModel213214        @property215        def chat_model_params(self) -> dict:216            # Return initialization parameters for the model.217            return {"model": "model-001", "temperature": 0}218    ```219220    !!! note221        API references for individual test methods include troubleshooting tips.222223224    Test subclasses **must** implement the following two properties:225226    `chat_model_class`: The chat model class to test, e.g., `ChatParrotLink`.227228    ```python229    @property230    def chat_model_class(self) -> Type[ChatParrotLink]:231        return ChatParrotLink232    ```233234    `chat_model_params`: Initialization parameters for the chat model.235236    ```python237    @property238    def chat_model_params(self) -> dict:239        return {"model": "bird-brain-001", "temperature": 0}240    ```241242    In addition, test subclasses can control what features are tested (such as tool243    calling or multi-modality) by selectively overriding the following properties.244245    Expand to see details:246247    ???+ info "`has_tool_calling`"248249        Boolean property indicating whether the chat model supports tool calling.250251        By default, this is determined by whether the chat model's `bind_tools` method252        is overridden. It typically does not need to be overridden on the test class.253254        ```python255        @property256        def has_tool_calling(self) -> bool:257            return True258        ```259260    ??? info "`has_tool_choice`"261262        Boolean property indicating whether the chat model supports forcing tool263        calling via a `tool_choice` parameter.264265        By default, this is determined by whether the parameter is included in the266        signature for the corresponding `bind_tools` method.267268        If `True`, the minimum requirement for this feature is that269        `tool_choice='any'` will force a tool call, and `tool_choice=<tool name>`270        will force a call to a specific tool.271272        ```python273        @property274        def has_tool_choice(self) -> bool:275            return False276        ```277278    ??? info "`has_structured_output`"279280        Boolean property indicating whether the chat model supports structured281        output.282283        By default, this is determined by whether the chat model's284        `with_structured_output` method is overridden. If the base implementation is285        intended to be used, this method should be overridden.286287        See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).288289        ```python290        @property291        def has_structured_output(self) -> bool:292            return True293        ```294295    ??? info "`structured_output_kwargs`"296297        Dict property specifying additional kwargs to pass to298        `with_structured_output()` when running structured output tests.299300        Override this to customize how your model generates structured output.301302        The most common use case is specifying the `method` parameter:303304        - `'function_calling'`: Uses tool/function calling to enforce the schema.305        - `'json_mode'`: Uses the model's JSON mode.306        - `'json_schema'`: Uses native JSON schema support (e.g., OpenAI's structured307            outputs).308309        ```python310        @property311        def structured_output_kwargs(self) -> dict:312            return {"method": "json_schema"}313        ```314315    ??? info "`supports_json_mode`"316317        Boolean property indicating whether the chat model supports318        `method='json_mode'` in `with_structured_output`.319320        Defaults to `False`.321322        JSON mode constrains the model to output valid JSON without enforcing323        a specific schema (unlike `'function_calling'` or `'json_schema'` methods).324325        When using JSON mode, you must prompt the model to output JSON in your326        message.327328        !!! example329330            ```python331            structured_llm = llm.with_structured_output(MySchema, method="json_mode")332            structured_llm.invoke("... Return the result as JSON.")333            ```334335        See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).336337        ```python338        @property339        def supports_json_mode(self) -> bool:340            return True341        ```342343    ??? info "`supports_image_inputs`"344345        Boolean property indicating whether the chat model supports image inputs.346347        Defaults to `False`.348349        If set to `True`, the chat model will be tested by inputting an350        `ImageContentBlock` with the shape:351352        ```python353        {354            "type": "image",355            "base64": "<base64 image data>",356            "mime_type": "image/jpeg",  # or appropriate MIME type357        }358        ```359360        In addition to OpenAI-style content blocks:361362        ```python363        {364            "type": "image_url",365            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},366        }367        ```368369        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).370371        ```python372        @property373        def supports_image_inputs(self) -> bool:374            return True375        ```376377    ??? info "`supports_image_urls`"378379        Boolean property indicating whether the chat model supports image inputs from380        URLs.381382        Defaults to `False`.383384        If set to `True`, the chat model will be tested using content blocks of the385        form386387        ```python388        {389            "type": "image",390            "url": "https://...",391        }392        ```393394        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).395396        ```python397        @property398        def supports_image_urls(self) -> bool:399            return True400        ```401402    ??? info "`supports_image_tool_message`"403404        Boolean property indicating whether the chat model supports a `ToolMessage`405        that includes image content, e.g. in the OpenAI Chat Completions format.406407        Defaults to `False`.408409        ```python410        ToolMessage(411            content=[412                {413                    "type": "image_url",414                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},415                },416            ],417            tool_call_id="1",418            name="random_image",419        )420        ```421422        ...as well as the LangChain `ImageContentBlock` format:423424        ```python425        ToolMessage(426            content=[427                {428                    "type": "image",429                    "base64": image_data,430                    "mime_type": "image/jpeg",431                },432            ],433            tool_call_id="1",434            name="random_image",435        )436        ```437438        If set to `True`, the chat model will be tested with message sequences that439        include `ToolMessage` objects of this form.440441        ```python442        @property443        def supports_image_tool_message(self) -> bool:444            return True445        ```446447    ??? info "`supports_pdf_inputs`"448449        Boolean property indicating whether the chat model supports PDF inputs.450451        Defaults to `False`.452453        If set to `True`, the chat model will be tested by inputting a454        `FileContentBlock` with the shape:455456        ```python457        {458            "type": "file",459            "base64": "<base64 file data>",460            "mime_type": "application/pdf",461        }462        ```463464        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).465466        ```python467        @property468        def supports_pdf_inputs(self) -> bool:469            return True470        ```471472    ??? info "`supports_pdf_tool_message`"473474        Boolean property indicating whether the chat model supports a `ToolMessage`475        that includes PDF content using the LangChain `FileContentBlock` format.476477        Defaults to `False`.478479        ```python480        ToolMessage(481            content=[482                {483                    "type": "file",484                    "base64": pdf_data,485                    "mime_type": "application/pdf",486                },487            ],488            tool_call_id="1",489            name="random_pdf",490        )491        ```492493        If set to `True`, the chat model will be tested with message sequences that494        include `ToolMessage` objects of this form.495496        ```python497        @property498        def supports_pdf_tool_message(self) -> bool:499            return True500        ```501502    ??? info "`supports_audio_inputs`"503504        Boolean property indicating whether the chat model supports audio inputs.505506        Defaults to `False`.507508        If set to `True`, the chat model will be tested by inputting an509        `AudioContentBlock` with the shape:510511        ```python512        {513            "type": "audio",514            "base64": "<base64 audio data>",515            "mime_type": "audio/wav",  # or appropriate MIME type516        }517        ```518519        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).520521        ```python522        @property523        def supports_audio_inputs(self) -> bool:524            return True525        ```526527        !!! warning528            This test downloads audio data from wikimedia.org. You may need to set the529            `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these tests,530            e.g.,531532            ```bash533            export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"534            ```535536            Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).537538    ??? info "`supports_video_inputs`"539540        Boolean property indicating whether the chat model supports image inputs.541542        Defaults to `False`.543544        No current tests are written for this feature.545546    ??? info "`returns_usage_metadata`"547548        Boolean property indicating whether the chat model returns usage metadata549        on invoke and streaming responses.550551        Defaults to `True`.552553        `usage_metadata` is an optional dict attribute on `AIMessage` objects that track554        input and output tokens.555556        [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).557558        ```python559        @property560        def returns_usage_metadata(self) -> bool:561            return False562        ```563564        Models supporting `usage_metadata` should also return the name of the underlying565        model in the `response_metadata` of the `AIMessage`.566567    ??? info "`supports_anthropic_inputs`"568569        Boolean property indicating whether the chat model supports Anthropic-style570        inputs.571572        Defaults to `False`.573574        These inputs might feature "tool use" and "tool result" content blocks, e.g.,575576        ```python577        [578            {"type": "text", "text": "Hmm let me think about that"},579            {580                "type": "tool_use",581                "input": {"fav_color": "green"},582                "id": "foo",583                "name": "color_picker",584            },585        ]586        ```587588        If set to `True`, the chat model will be tested using content blocks of this589        form.590591        ```python592        @property593        def supports_anthropic_inputs(self) -> bool:594            return True595        ```596597    ??? info "`supported_usage_metadata_details`"598599        Property controlling what usage metadata details are emitted in both invoke600        and stream.601602        Defaults to `{"invoke": [], "stream": []}`.603604        `usage_metadata` is an optional dict attribute on `AIMessage` objects that track605        input and output tokens.606607        [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).608609        It includes optional keys `input_token_details` and `output_token_details`610        that can track usage details associated with special types of tokens, such as611        cached, audio, or reasoning.612613        Only needs to be overridden if these details are supplied.614615    ??? info "`enable_vcr_tests`"616617        Property controlling whether to enable select tests that rely on618        [VCR](https://vcrpy.readthedocs.io/en/latest/) caching of HTTP calls, such619        as benchmarking tests.620621        Defaults to `False`.622623        To enable these tests, follow these steps:624625        1. Override the `enable_vcr_tests` property to return `True`:626627            ```python628            @property629            def enable_vcr_tests(self) -> bool:630                return True631            ```632633        2. Configure VCR to exclude sensitive headers and other information from634            cassettes.635636            !!! warning637                VCR will by default record authentication headers and other sensitive638                information in cassettes. Read below for how to configure what639                information is recorded in cassettes.640641            To add configuration to VCR, add a `conftest.py` file to the `tests/`642            directory and implement the `vcr_config` fixture there.643644            `langchain-tests` excludes the headers `'authorization'`,645            `'x-api-key'`, and `'api-key'` from VCR cassettes. To pick up this646            configuration, you will need to add `conftest.py` as shown below. You can647            also exclude additional headers, override the default exclusions, or apply648            other customizations to the VCR configuration. See example below:649650            ```python title="tests/conftest.py"651            import pytest652            from langchain_tests.conftest import base_vcr_config653654            _EXTRA_HEADERS = [655                # Specify additional headers to redact656                ("user-agent", "PLACEHOLDER"),657            ]658659660            def remove_response_headers(response: dict) -> dict:661                # If desired, remove or modify headers in the response.662                response["headers"] = {}663                return response664665666            @pytest.fixture(scope="session")667            def vcr_config() -> dict:668                """Extend the default configuration from langchain_tests."""669                config = base_vcr_config()670                config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)671                config["before_record_response"] = remove_response_headers672673                return config674            ```675676            ??? note "Compressing cassettes"677678                `langchain-tests` includes a custom VCR serializer that compresses679                cassettes using gzip. To use it, register the `yaml.gz` serializer680                to your VCR fixture and enable this serializer in the config. See681                example below:682683                ```python title="tests/conftest.py"684                import pytest685                from langchain_tests.conftest import (686                    CustomPersister,687                    CustomSerializer,688                )689                from langchain_tests.conftest import base_vcr_config690                from vcr import VCR691692                _EXTRA_HEADERS = [693                    # Specify additional headers to redact694                    ("user-agent", "PLACEHOLDER"),695                ]696697698                def remove_response_headers(response: dict) -> dict:699                    # If desired, remove or modify headers in the response.700                    response["headers"] = {}701                    return response702703704                @pytest.fixture(scope="session")705                def vcr_config() -> dict:706                    """Extend the default configuration from langchain_tests."""707                    config = base_vcr_config()708                    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)709                    config["before_record_response"] = remove_response_headers710                    # New: enable serializer and set file extension711                    config["serializer"] = "yaml.gz"712                    config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")713714                    return config715716717                def pytest_recording_configure(config: dict, vcr: VCR) -> None:718                    vcr.register_persister(CustomPersister())719                    vcr.register_serializer("yaml.gz", CustomSerializer())720                ```721722                You can inspect the contents of the compressed cassettes (e.g., to723                ensure no sensitive information is recorded) using724725                ```bash726                gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz727                ```728729                ...or by using the serializer:730731                ```python732                from langchain_tests.conftest import (733                    CustomPersister,734                    CustomSerializer,735                )736737                cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"738                requests, responses = CustomPersister().load_cassette(739                    path, CustomSerializer()740                )741                ```742743        3. Run tests to generate VCR cassettes.744745            ```bash title="Example"746            uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time747            ```748749            This will generate a VCR cassette for the test in750            `tests/integration_tests/cassettes/`.751752            !!! warning753                You should inspect the generated cassette to ensure that it does not754                contain sensitive information. If it does, you can modify the755                `vcr_config` fixture to exclude headers or modify the response756                before it is recorded.757758            You can then commit the cassette to your repository. Subsequent test runs759            will use the cassette instead of making HTTP calls.760    '''  # noqa: E501761762    @override763    @property764    def standard_chat_model_params(self) -> dict[str, Any]:765        """Standard parameters for chat model."""766        return {}767768    def test_invoke(self, model: BaseChatModel) -> None:769        """Test to verify that `model.invoke(simple_message)` works.770771        This should pass for all integrations.772773        ??? question "Troubleshooting"774775            If this test fails, you should make sure your `_generate` method776            does not raise any exceptions, and that it returns a valid777            `langchain_core.outputs.chat_result.ChatResult` like so:778779            ```python780            return ChatResult(781                generations=[ChatGeneration(message=AIMessage(content="Output text"))]782            )783            ```784785        """786        result = model.invoke("Hello")787        assert result is not None788        assert isinstance(result, AIMessage)789        assert isinstance(result.text, str)790        assert len(result.content) > 0791792    async def test_ainvoke(self, model: BaseChatModel) -> None:793        """Test to verify that `await model.ainvoke(simple_message)` works.794795        This should pass for all integrations. Passing this test does not indicate796        a "natively async" implementation, but rather that the model can be used797        in an async context.798799        ??? question "Troubleshooting"800801            First, debug802            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.803            because `ainvoke` has a default implementation that calls `invoke` in an804            async context.805806            If that test passes but not this one, you should make sure your `_agenerate`807            method does not raise any exceptions, and that it returns a valid808            `langchain_core.outputs.chat_result.ChatResult` like so:809810            ```python811            return ChatResult(812                generations=[ChatGeneration(message=AIMessage(content="Output text"))]813            )814            ```815        """816        result = await model.ainvoke("Hello")817        assert result is not None818        assert isinstance(result, AIMessage)819        assert isinstance(result.text, str)820        assert len(result.content) > 0821822    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)823    def test_stream(self, model: BaseChatModel) -> None:824        """Test to verify that `model.stream(simple_message)` works.825826        This should pass for all integrations. Passing this test does not indicate827        a "streaming" implementation, but rather that the model can be used in a828        streaming context.829830        ??? question "Troubleshooting"831832            First, debug833            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.834            because `stream` has a default implementation that calls `invoke` and835            yields the result as a single chunk.836837            If that test passes but not this one, you should make sure your `_stream`838            method does not raise any exceptions, and that it yields valid839            `langchain_core.outputs.chat_generation.ChatGenerationChunk`840            objects like so:841842            ```python843            yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))844            ```845846            The final chunk must have `chunk_position='last'` to signal stream847            completion. This enables proper parsing of `tool_call_chunks` into848            `tool_calls` on the aggregated message:849850            ```python851            for i, token in enumerate(tokens):852                is_last = i == len(tokens) - 1853                yield ChatGenerationChunk(854                    message=AIMessageChunk(855                        content=token,856                        chunk_position="last" if is_last else None,857                    )858                )859            ```860        """861        chunks: list[AIMessageChunk] = []862        full: AIMessageChunk | None = None863        for chunk in model.stream("Hello"):864            assert chunk is not None865            assert isinstance(chunk, AIMessageChunk)866            assert isinstance(chunk.content, str | list)867            chunks.append(chunk)868            full = chunk if full is None else full + chunk869        assert len(chunks) > 0870        assert isinstance(full, AIMessageChunk)871        assert full.content872        assert full.text873        # Exactly one text block — guards against merge bugs that would produce874        # multiple adjacent text blocks in the aggregated result.875        text_blocks = [b for b in full.content_blocks if b["type"] == "text"]876        assert len(text_blocks) == 1877878        # Verify chunk_position signaling879        last_chunk = chunks[-1]880        assert last_chunk.chunk_position == "last", (881            f"Final chunk must have chunk_position='last', "882            f"got {last_chunk.chunk_position!r}"883        )884885    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)886    async def test_astream(self, model: BaseChatModel) -> None:887        """Test to verify that `await model.astream(simple_message)` works.888889        This should pass for all integrations. Passing this test does not indicate890        a "natively async" or "streaming" implementation, but rather that the model can891        be used in an async streaming context.892893        ??? question "Troubleshooting"894895            First, debug896            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.897            and898            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.899            because `astream` has a default implementation that calls `_stream` in900            an async context if it is implemented, or `ainvoke` and yields the result901            as a single chunk if not.902903            If those tests pass but not this one, you should make sure your `_astream`904            method does not raise any exceptions, and that it yields valid905            `langchain_core.outputs.chat_generation.ChatGenerationChunk`906            objects like so:907908            ```python909            yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))910            ```911912            See `test_stream` troubleshooting for `chunk_position` requirements.913        """914        chunks: list[AIMessageChunk] = []915        full: AIMessageChunk | None = None916        async for chunk in model.astream("Hello"):917            assert chunk is not None918            assert isinstance(chunk, AIMessageChunk)919            assert isinstance(chunk.content, str | list)920            chunks.append(chunk)921            full = chunk if full is None else full + chunk922        assert len(chunks) > 0923        assert isinstance(full, AIMessageChunk)924        assert full.content925        assert full.text926        # Exactly one text block — guards against merge bugs that would produce927        # multiple adjacent text blocks in the aggregated result.928        text_blocks = [b for b in full.content_blocks if b["type"] == "text"]929        assert len(text_blocks) == 1930931        # Verify chunk_position signaling932        last_chunk = chunks[-1]933        assert last_chunk.chunk_position == "last", (934            f"Final chunk must have chunk_position='last', "935            f"got {last_chunk.chunk_position!r}"936        )937938    def test_stream_events_v3(self, model: BaseChatModel) -> None:939        """Test that `model.stream_events("Hello", version="v3")` works.940941        Exercises the content-block-centric streaming protocol. Passing this942        test indicates the model participates in `stream_events(version="v3")` either943        natively (via `_stream_chat_model_events`) or through the compat bridge that944        converts `_stream` chunks into protocol events.945946        ??? question "Troubleshooting"947948            First, debug949            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`950            — `stream_events(version="v3")` falls back to the same951            `_stream` path via the compat bridge when the model does not952            implement953            `_stream_chat_model_events`. If `test_stream` passes but this does954            not, inspect the raised lifecycle violation: it identifies the955            event index and the rule broken.956        """957        stream = model.stream_events("Hello", version="v3")958        assert isinstance(stream, ChatModelStream)959960        events = list(stream)961        assert len(events) > 0962        assert_valid_event_stream(events)963964        message = stream.output965        assert isinstance(message, AIMessage)966        assert message.content967        assert message.text968        assert any(block["type"] == "text" for block in message.content_blocks)969        # `stream_events(version="v3")` always assembles content as v1 protocol blocks.970        assert message.response_metadata.get("output_version") == "v1"971972    async def test_astream_events_v3(self, model: BaseChatModel) -> None:973        """Test that `await model.astream_events("Hello", version="v3")` works.974975        Async counterpart to `test_stream_events_v3`. Exercises the976        `AsyncChatModelStream` path end-to-end: the background producer task,977        replay-buffer-backed event iteration, and the awaitable `output`978        projection.979980        ??? question "Troubleshooting"981982            First, debug983            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_astream`.984            If `test_astream` passes but this does not, inspect the raised985            lifecycle violation; it identifies the event index and the rule986            broken.987        """988        stream = await model.astream_events("Hello", version="v3")989        assert isinstance(stream, AsyncChatModelStream)990991        events = [event async for event in stream]992        assert len(events) > 0993        assert_valid_event_stream(events)994995        message = await stream.output996        assert isinstance(message, AIMessage)997        assert message.content998        assert message.text999        assert any(block["type"] == "text" for block in message.content_blocks)1000        assert message.response_metadata.get("output_version") == "v1"10011002    def test_invoke_with_model_override(self, model: BaseChatModel) -> None:1003        """Test that model name can be overridden at invoke time via kwargs.10041005        This enables dynamic model selection without creating new instances,1006        which is useful for fallback strategies, A/B testing, or cost optimization.10071008        Test is skipped if `supports_model_override` is `False`.10091010        ??? question "Troubleshooting"10111012            If this test fails, ensure that your `_generate` method passes1013            `**kwargs` through to the API request payload in a way that allows1014            the `model` parameter to be overridden.10151016            For example:1017            ```python1018            def _get_request_payload(self, ..., **kwargs) -> dict:1019                return {1020                    "model": self.model,1021                    ...1022                    **kwargs,  # kwargs should come last to allow overrides1023                }1024            ```1025        """1026        if not self.supports_model_override:1027            pytest.skip("Model override not supported.")10281029        override_model = self.model_override_value1030        if not override_model:1031            pytest.skip("model_override_value not specified.")10321033        result = model.invoke("Hello", model=override_model)1034        assert result is not None1035        assert isinstance(result, AIMessage)10361037        # Verify the overridden model was used1038        model_name = result.response_metadata.get("model_name")1039        assert model_name is not None, "model_name not found in response_metadata"1040        assert override_model in model_name, (1041            f"Expected model '{override_model}' but got '{model_name}'"1042        )10431044    async def test_ainvoke_with_model_override(self, model: BaseChatModel) -> None:1045        """Test that model name can be overridden at ainvoke time via kwargs.10461047        Test is skipped if `supports_model_override` is `False`.10481049        ??? question "Troubleshooting"10501051            See troubleshooting for `test_invoke_with_model_override`.1052        """1053        if not self.supports_model_override:1054            pytest.skip("Model override not supported.")10551056        override_model = self.model_override_value1057        if not override_model:1058            pytest.skip("model_override_value not specified.")10591060        result = await model.ainvoke("Hello", model=override_model)1061        assert result is not None1062        assert isinstance(result, AIMessage)10631064        # Verify the overridden model was used1065        model_name = result.response_metadata.get("model_name")1066        assert model_name is not None, "model_name not found in response_metadata"1067        assert override_model in model_name, (1068            f"Expected model '{override_model}' but got '{model_name}'"1069        )10701071    def test_stream_with_model_override(self, model: BaseChatModel) -> None:1072        """Test that model name can be overridden at stream time via kwargs.10731074        Test is skipped if `supports_model_override` is `False`.10751076        ??? question "Troubleshooting"10771078            See troubleshooting for `test_invoke_with_model_override`.1079        """1080        if not self.supports_model_override:1081            pytest.skip("Model override not supported.")10821083        override_model = self.model_override_value1084        if not override_model:1085            pytest.skip("model_override_value not specified.")10861087        full: AIMessageChunk | None = None1088        for chunk in model.stream("Hello", model=override_model):1089            assert isinstance(chunk, AIMessageChunk)1090            full = chunk if full is None else full + chunk10911092        assert full is not None10931094        # Verify the overridden model was used1095        model_name = full.response_metadata.get("model_name")1096        assert model_name is not None, "model_name not found in response_metadata"1097        assert override_model in model_name, (1098            f"Expected model '{override_model}' but got '{model_name}'"1099        )11001101    async def test_astream_with_model_override(self, model: BaseChatModel) -> None:1102        """Test that model name can be overridden at astream time via kwargs.11031104        Test is skipped if `supports_model_override` is `False`.11051106        ??? question "Troubleshooting"11071108            See troubleshooting for `test_invoke_with_model_override`.1109        """1110        if not self.supports_model_override:1111            pytest.skip("Model override not supported.")11121113        override_model = self.model_override_value1114        if not override_model:1115            pytest.skip("model_override_value not specified.")11161117        full: AIMessageChunk | None = None1118        async for chunk in model.astream("Hello", model=override_model):1119            assert isinstance(chunk, AIMessageChunk)1120            full = chunk if full is None else full + chunk11211122        assert full is not None11231124        # Verify the overridden model was used1125        model_name = full.response_metadata.get("model_name")1126        assert model_name is not None, "model_name not found in response_metadata"1127        assert override_model in model_name, (1128            f"Expected model '{override_model}' but got '{model_name}'"1129        )11301131    def test_batch(self, model: BaseChatModel) -> None:1132        """Test to verify that `model.batch([messages])` works.11331134        This should pass for all integrations. Tests the model's ability to process1135        multiple prompts in a single batch.11361137        ??? question "Troubleshooting"11381139            First, debug1140            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1141            because `batch` has a default implementation that calls `invoke` for1142            each message in the batch.11431144            If that test passes but not this one, you should make sure your `batch`1145            method does not raise any exceptions, and that it returns a list of valid1146            `AIMessage` objects.11471148        """1149        batch_results = model.batch(["Hello", "Hey"])1150        assert batch_results is not None1151        assert isinstance(batch_results, list)1152        assert len(batch_results) == 21153        for result in batch_results:1154            assert result is not None1155            assert isinstance(result, AIMessage)1156            assert isinstance(result.text, str)1157            assert len(result.content) > 011581159    async def test_abatch(self, model: BaseChatModel) -> None:1160        """Test to verify that `await model.abatch([messages])` works.11611162        This should pass for all integrations. Tests the model's ability to process1163        multiple prompts in a single batch asynchronously.11641165        ??? question "Troubleshooting"11661167            First, debug1168            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`1169            and1170            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`1171            because `abatch` has a default implementation that calls `ainvoke` for1172            each message in the batch.11731174            If those tests pass but not this one, you should make sure your `abatch`1175            method does not raise any exceptions, and that it returns a list of valid1176            `AIMessage` objects.11771178        """1179        batch_results = await model.abatch(["Hello", "Hey"])1180        assert batch_results is not None1181        assert isinstance(batch_results, list)1182        assert len(batch_results) == 21183        for result in batch_results:1184            assert result is not None1185            assert isinstance(result, AIMessage)1186            assert isinstance(result.text, str)1187            assert len(result.content) > 011881189    def test_conversation(self, model: BaseChatModel) -> None:1190        """Test to verify that the model can handle multi-turn conversations.11911192        This should pass for all integrations. Tests the model's ability to process1193        a sequence of alternating `HumanMessage` and `AIMessage` objects as context for1194        generating the next response.11951196        ??? question "Troubleshooting"11971198            First, debug1199            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1200            because this test also uses `model.invoke`.12011202            If that test passes but not this one, you should verify that:12031204            1. Your model correctly processes the message history1205            2. The model maintains appropriate context from previous messages1206            3. The response is a valid `langchain_core.messages.AIMessage`12071208        """1209        messages = [1210            HumanMessage("hello"),1211            AIMessage("hello"),1212            HumanMessage("how are you"),1213        ]12141215        result = model.invoke(messages)1216        assert result is not None1217        assert isinstance(result, AIMessage)1218        assert isinstance(result.text, str)1219        assert len(result.content) > 012201221    def test_double_messages_conversation(self, model: BaseChatModel) -> None:1222        """Test to verify that the model can handle double-message conversations.12231224        This should pass for all integrations. Tests the model's ability to process1225        a sequence of double-system, double-human, and double-ai messages as context1226        for generating the next response.12271228        ??? question "Troubleshooting"12291230            First, debug1231            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1232            because this test also uses `model.invoke`.12331234            Second, debug1235            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`1236            because this test is the "basic case" without double messages.12371238            If that test passes those but not this one, you should verify that:12391240            1. Your model API can handle double messages, or the integration should1241                merge messages before sending them to the API.1242            2. The response is a valid `langchain_core.messages.AIMessage`12431244        """1245        messages = [1246            SystemMessage("hello"),1247            SystemMessage("hello"),1248            HumanMessage("hello"),1249            HumanMessage("hello"),1250            AIMessage("hello"),1251            AIMessage("hello"),1252            HumanMessage("how are you"),1253        ]12541255        result = model.invoke(messages)1256        assert result is not None1257        assert isinstance(result, AIMessage)1258        assert isinstance(result.text, str)1259        assert len(result.content) > 012601261    def test_usage_metadata(self, model: BaseChatModel) -> None:1262        """Test to verify that the model returns correct usage metadata.12631264        This test is optional and should be skipped if the model does not return1265        usage metadata (see configuration below).12661267        !!! warning "Behavior changed in `langchain-tests` 0.3.17"12681269            Additionally check for the presence of `model_name` in the response1270            metadata, which is needed for usage tracking in callback handlers.12711272        ??? note "Configuration"12731274            By default, this test is run.12751276            To disable this feature, set `returns_usage_metadata` to `False` in your1277            test class:12781279            ```python1280            class TestMyChatModelIntegration(ChatModelIntegrationTests):1281                @property1282                def returns_usage_metadata(self) -> bool:1283                    return False1284            ```12851286            This test can also check the format of specific kinds of usage metadata1287            based on the `supported_usage_metadata_details` property.12881289            This property should be configured as follows with the types of tokens that1290            the model supports tracking:12911292            ```python1293            class TestMyChatModelIntegration(ChatModelIntegrationTests):1294                @property1295                def supported_usage_metadata_details(self) -> dict:1296                    return {1297                        "invoke": [1298                            "audio_input",1299                            "audio_output",1300                            "reasoning_output",1301                            "cache_read_input",1302                            "cache_creation_input",1303                        ],1304                        "stream": [1305                            "audio_input",1306                            "audio_output",1307                            "reasoning_output",1308                            "cache_read_input",1309                            "cache_creation_input",1310                        ],1311                    }1312            ```13131314        ??? question "Troubleshooting"13151316            If this test fails, first verify that your model returns1317            `langchain_core.messages.ai.UsageMetadata` dicts1318            attached to the returned `AIMessage` object in `_generate`:13191320            ```python1321            return ChatResult(1322                generations=[1323                    ChatGeneration(1324                        message=AIMessage(1325                            content="Output text",1326                            usage_metadata={1327                                "input_tokens": 350,1328                                "output_tokens": 240,1329                                "total_tokens": 590,1330                                "input_token_details": {1331                                    "audio": 10,1332                                    "cache_creation": 200,1333                                    "cache_read": 100,1334                                },1335                                "output_token_details": {1336                                    "audio": 10,1337                                    "reasoning": 200,1338                                },1339                            },1340                        )1341                    )1342                ]1343            )1344            ```13451346            Check also that the response includes a `model_name` key in its1347            `usage_metadata`.1348        """1349        if not self.returns_usage_metadata:1350            pytest.skip("Not implemented.")13511352        result = model.invoke("Hello")1353        assert result is not None1354        assert isinstance(result, AIMessage)13551356        assert result.usage_metadata is not None1357        assert isinstance(result.usage_metadata["input_tokens"], int)1358        assert isinstance(result.usage_metadata["output_tokens"], int)1359        assert isinstance(result.usage_metadata["total_tokens"], int)13601361        # Check model_name is in response_metadata1362        # Needed for langchain_core.callbacks.usage1363        model_name = result.response_metadata.get("model_name")1364        assert isinstance(model_name, str)1365        assert model_name, "model_name is empty"13661367        # `input_tokens` is the total, possibly including other unclassified or1368        # system-level tokens.1369        if "audio_input" in self.supported_usage_metadata_details["invoke"]:1370            # Checks if the specific chat model integration being tested has declared1371            # that it supports reporting token counts specifically for `audio_input`1372            msg = self.invoke_with_audio_input()  # To be implemented in test subclass1373            assert (usage_metadata := msg.usage_metadata) is not None1374            assert (1375                input_token_details := usage_metadata.get("input_token_details")1376            ) is not None1377            assert isinstance(input_token_details.get("audio"), int)1378            # Asserts that total input tokens are at least the sum of the token counts1379            assert usage_metadata.get("input_tokens", 0) >= sum(1380                v for v in input_token_details.values() if isinstance(v, int)1381            )1382        if "audio_output" in self.supported_usage_metadata_details["invoke"]:1383            msg = self.invoke_with_audio_output()1384            assert (usage_metadata := msg.usage_metadata) is not None1385            assert (1386                output_token_details := usage_metadata.get("output_token_details")1387            ) is not None1388            assert isinstance(output_token_details.get("audio"), int)1389            # Asserts that total output tokens are at least the sum of the token counts1390            assert usage_metadata.get("output_tokens", 0) >= sum(1391                v for v in output_token_details.values() if isinstance(v, int)1392            )1393        if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:1394            msg = self.invoke_with_reasoning_output()1395            assert (usage_metadata := msg.usage_metadata) is not None1396            assert (1397                output_token_details := usage_metadata.get("output_token_details")1398            ) is not None1399            assert isinstance(output_token_details.get("reasoning"), int)1400            # Asserts that total output tokens are at least the sum of the token counts1401            assert usage_metadata.get("output_tokens", 0) >= sum(1402                v for v in output_token_details.values() if isinstance(v, int)1403            )1404        if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:1405            msg = self.invoke_with_cache_read_input()1406            usage_metadata = msg.usage_metadata1407            assert usage_metadata is not None1408            input_token_details = usage_metadata.get("input_token_details")1409            assert input_token_details is not None1410            cache_read_tokens = input_token_details.get("cache_read")1411            assert isinstance(cache_read_tokens, int)1412            assert cache_read_tokens >= 01413            # Asserts that total input tokens are at least the sum of the token counts1414            total_detailed_tokens = sum(1415                v for v in input_token_details.values() if isinstance(v, int) and v >= 01416            )1417            input_tokens = usage_metadata.get("input_tokens", 0)1418            assert isinstance(input_tokens, int)1419            assert input_tokens >= total_detailed_tokens1420        if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:1421            msg = self.invoke_with_cache_creation_input()1422            usage_metadata = msg.usage_metadata1423            assert usage_metadata is not None1424            input_token_details = usage_metadata.get("input_token_details")1425            assert input_token_details is not None1426            cache_creation_tokens = input_token_details.get("cache_creation")1427            assert isinstance(cache_creation_tokens, int)1428            assert cache_creation_tokens >= 01429            # Asserts that total input tokens are at least the sum of the token counts1430            total_detailed_tokens = sum(1431                v for v in input_token_details.values() if isinstance(v, int) and v >= 01432            )1433            input_tokens = usage_metadata.get("input_tokens", 0)1434            assert isinstance(input_tokens, int)1435            assert input_tokens >= total_detailed_tokens14361437    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:1438        """Test usage metadata in streaming mode.14391440        Test to verify that the model returns correct usage metadata in streaming mode.14411442        !!! warning "Behavior changed in `langchain-tests` 0.3.17"14431444            Additionally check for the presence of `model_name` in the response1445            metadata, which is needed for usage tracking in callback handlers.14461447        ??? note "Configuration"14481449            By default, this test is run.1450            To disable this feature, set `returns_usage_metadata` to `False` in your1451            test class:14521453            ```python1454            class TestMyChatModelIntegration(ChatModelIntegrationTests):1455                @property1456                def returns_usage_metadata(self) -> bool:1457                    return False1458            ```14591460            This test can also check the format of specific kinds of usage metadata1461            based on the `supported_usage_metadata_details` property.14621463            This property should be configured as follows with the types of tokens that1464            the model supports tracking:14651466            ```python1467            class TestMyChatModelIntegration(ChatModelIntegrationTests):1468                @property1469                def supported_usage_metadata_details(self) -> dict:1470                    return {1471                        "invoke": [1472                            "audio_input",1473                            "audio_output",1474                            "reasoning_output",1475                            "cache_read_input",1476                            "cache_creation_input",1477                        ],1478                        "stream": [1479                            "audio_input",1480                            "audio_output",1481                            "reasoning_output",1482                            "cache_read_input",1483                            "cache_creation_input",1484                        ],1485                    }1486            ```14871488        ??? question "Troubleshooting"14891490            If this test fails, first verify that your model yields1491            `langchain_core.messages.ai.UsageMetadata` dicts1492            attached to the returned `AIMessage` object in `_stream`1493            that sum up to the total usage metadata.14941495            Note that `input_tokens` should only be included on one of the chunks1496            (typically the first or the last chunk), and the rest should have `0` or1497            `None` to avoid counting input tokens multiple times.14981499            `output_tokens` typically count the number of tokens in each chunk, not1500            the sum. This test will pass as long as the sum of `output_tokens` across1501            all chunks is not `0`.15021503            ```python1504            yield ChatResult(1505                generations=[1506                    ChatGeneration(1507                        message=AIMessage(1508                            content="Output text",1509                            usage_metadata={1510                                "input_tokens": (1511                                    num_input_tokens if is_first_chunk else 01512                                ),1513                                "output_tokens": 11,1514                                "total_tokens": (1515                                    11 + num_input_tokens if is_first_chunk else 111516                                ),1517                                "input_token_details": {1518                                    "audio": 10,1519                                    "cache_creation": 200,1520                                    "cache_read": 100,1521                                },1522                                "output_token_details": {1523                                    "audio": 10,1524                                    "reasoning": 200,1525                                },1526                            },1527                        )1528                    )1529                ]1530            )1531            ```15321533            Check also that the aggregated response includes a `model_name` key1534            in its `usage_metadata`.15351536        """1537        if not self.returns_usage_metadata:1538            pytest.skip("Not implemented.")15391540        full: AIMessageChunk | None = None1541        for chunk in model.stream("Write me 2 haikus. Only include the haikus."):1542            assert isinstance(chunk, AIMessageChunk)1543            # only one chunk is allowed to set usage_metadata.input_tokens1544            # if multiple do, it's likely a bug that will result in overcounting1545            # input tokens (since the total number of input tokens applies to the full1546            # generation, not individual chunks)1547            if full and full.usage_metadata and full.usage_metadata["input_tokens"]:1548                assert (1549                    not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]1550                ), (1551                    "Only one chunk should set input_tokens,"1552                    " the rest should be 0 or None"1553                )1554            # only one chunk is allowed to set usage_metadata.model_name1555            # if multiple do, they'll be concatenated incorrectly1556            if full and full.usage_metadata and full.usage_metadata.get("model_name"):1557                assert not chunk.usage_metadata or not chunk.usage_metadata.get(1558                    "model_name"1559                ), "Only one chunk should set model_name, the rest should be None"1560            full = chunk if full is None else full + chunk15611562        assert isinstance(full, AIMessageChunk)1563        assert full.usage_metadata is not None1564        assert isinstance(full.usage_metadata["input_tokens"], int)1565        assert isinstance(full.usage_metadata["output_tokens"], int)1566        assert isinstance(full.usage_metadata["total_tokens"], int)15671568        # Check model_name is in response_metadata1569        # Needed for langchain_core.callbacks.usage1570        model_name = full.response_metadata.get("model_name")1571        assert isinstance(model_name, str)1572        assert model_name, "model_name is empty"15731574        if "audio_input" in self.supported_usage_metadata_details["stream"]:1575            msg = self.invoke_with_audio_input(stream=True)1576            assert msg.usage_metadata is not None1577            assert isinstance(1578                msg.usage_metadata.get("input_token_details", {}).get("audio"), int1579            )1580        if "audio_output" in self.supported_usage_metadata_details["stream"]:1581            msg = self.invoke_with_audio_output(stream=True)1582            assert msg.usage_metadata is not None1583            assert isinstance(1584                msg.usage_metadata.get("output_token_details", {}).get("audio"), int1585            )1586        if "reasoning_output" in self.supported_usage_metadata_details["stream"]:1587            msg = self.invoke_with_reasoning_output(stream=True)1588            assert msg.usage_metadata is not None1589            assert isinstance(1590                msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int1591            )1592        if "cache_read_input" in self.supported_usage_metadata_details["stream"]:1593            msg = self.invoke_with_cache_read_input(stream=True)1594            assert msg.usage_metadata is not None1595            assert isinstance(1596                msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int1597            )1598        if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:1599            msg = self.invoke_with_cache_creation_input(stream=True)1600            assert msg.usage_metadata is not None1601            assert isinstance(1602                msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),1603                int,1604            )16051606    def test_stop_sequence(self, model: BaseChatModel) -> None:1607        """Test that model does not fail when invoked with the `stop` parameter.16081609        The `stop` parameter is a standard parameter for stopping generation at a1610        certain token.16111612        [More on standard parameters](https://python.langchain.com/docs/concepts/chat_models/#standard-parameters).16131614        This should pass for all integrations.16151616        ??? question "Troubleshooting"16171618            If this test fails, check that the function signature for `_generate`1619            (as well as `_stream` and async variants) accepts the `stop` parameter:16201621            ```python1622            def _generate(1623                self,1624                messages: List[BaseMessage],1625                stop: list[str] | None = None,1626                run_manager: CallbackManagerForLLMRun | None = None,1627                **kwargs: Any,1628            ) -> ChatResult:16291630            ```1631        """1632        result = model.invoke("hi", stop=["you"])1633        assert isinstance(result, AIMessage)16341635        custom_model = self.chat_model_class(1636            **{1637                **self.chat_model_params,1638                "stop": ["you"],1639            }1640        )1641        result = custom_model.invoke("hi")1642        assert isinstance(result, AIMessage)16431644    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)1645    def test_tool_calling(self, model: BaseChatModel) -> None:1646        """Test that the model generates tool calls.16471648        This test is skipped if the `has_tool_calling` property on the test class is1649        set to `False`.16501651        This test is optional and should be skipped if the model does not support1652        tool calling (see configuration below).16531654        ??? note "Configuration"16551656            To disable tool calling tests, set `has_tool_calling` to `False` in your1657            test class:16581659            ```python1660            class TestMyChatModelIntegration(ChatModelIntegrationTests):1661                @property1662                def has_tool_calling(self) -> bool:1663                    return False1664            ```16651666        ??? question "Troubleshooting"16671668            If this test fails, check that `bind_tools` is implemented to correctly1669            translate LangChain tool objects into the appropriate schema for your1670            chat model.16711672            This test may fail if the chat model does not support a `tool_choice`1673            parameter. This parameter can be used to force a tool call. If1674            `tool_choice` is not supported and the model consistently fails this1675            test, you can `xfail` the test:16761677            ```python1678            @pytest.mark.xfail(reason=("Does not support tool_choice."))1679            def test_tool_calling(self, model: BaseChatModel) -> None:1680                super().test_tool_calling(model)1681            ```16821683            Otherwise, in the case that only one tool is bound, ensure that1684            `tool_choice` supports the string `'any'` to force calling that tool.16851686            If `tool_call_streaming = true` is set in the model's profile1687            augmentations, individual chunks are also validated to contain1688            `tool_call_chunk` blocks in `content_blocks`.16891690        """1691        if not self.has_tool_calling:1692            pytest.skip("Test requires tool calling.")16931694        tool_choice_value = None if not self.has_tool_choice else "any"1695        model_with_tools = model.bind_tools(1696            [magic_function], tool_choice=tool_choice_value1697        )16981699        # Test invoke1700        query = "What is the value of magic_function(3)? Use the tool."1701        result = model_with_tools.invoke(query)1702        _validate_tool_call_message(result)17031704        tool_call_streaming = (1705            model.profile.get("tool_call_streaming", False) if model.profile else False1706        )17071708        # Test stream1709        full: BaseMessage | None = None1710        found_tool_call_chunk = False1711        for chunk in model_with_tools.stream(query):1712            if tool_call_streaming and isinstance(chunk, AIMessageChunk):1713                found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1714            full = chunk if full is None else (cast("AIMessageChunk", full) + chunk)1715        assert isinstance(full, AIMessage)1716        _validate_tool_call_message(full)17171718        if tool_call_streaming:1719            assert found_tool_call_chunk, (1720                "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1721                "least one chunk during streaming, but none were found. If this "1722                "model does not support streaming tool calls, set "1723                "tool_call_streaming=false in the model's profile augmentations."1724            )17251726    async def test_tool_calling_async(self, model: BaseChatModel) -> None:1727        """Test that the model generates tool calls.17281729        This test is skipped if the `has_tool_calling` property on the test class is1730        set to `False`.17311732        This test is optional and should be skipped if the model does not support1733        tool calling (see configuration below).17341735        ??? note "Configuration"17361737            To disable tool calling tests, set `has_tool_calling` to `False` in your1738            test class:17391740            ```python1741            class TestMyChatModelIntegration(ChatModelIntegrationTests):1742                @property1743                def has_tool_calling(self) -> bool:1744                    return False1745            ```17461747        ??? question "Troubleshooting"17481749            If this test fails, check that `bind_tools` is implemented to correctly1750            translate LangChain tool objects into the appropriate schema for your1751            chat model.17521753            This test may fail if the chat model does not support a `tool_choice`1754            parameter. This parameter can be used to force a tool call. If1755            `tool_choice` is not supported and the model consistently fails this1756            test, you can `xfail` the test:17571758            ```python1759            @pytest.mark.xfail(reason=("Does not support tool_choice."))1760            async def test_tool_calling_async(self, model: BaseChatModel) -> None:1761                await super().test_tool_calling_async(model)1762            ```17631764            Otherwise, in the case that only one tool is bound, ensure that1765            `tool_choice` supports the string `'any'` to force calling that tool.17661767            See `test_tool_calling` for `tool_call_streaming` profile configuration.17681769        """1770        if not self.has_tool_calling:1771            pytest.skip("Test requires tool calling.")17721773        tool_choice_value = None if not self.has_tool_choice else "any"1774        model_with_tools = model.bind_tools(1775            [magic_function], tool_choice=tool_choice_value1776        )17771778        # Test ainvoke1779        query = "What is the value of magic_function(3)? Use the tool."1780        result = await model_with_tools.ainvoke(query)1781        _validate_tool_call_message(result)17821783        tool_call_streaming = (1784            model.profile.get("tool_call_streaming", False) if model.profile else False1785        )17861787        # Test astream1788        full: BaseMessage | None = None1789        found_tool_call_chunk = False1790        async for chunk in model_with_tools.astream(query):1791            if tool_call_streaming and isinstance(chunk, AIMessageChunk):1792                found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1793            full = chunk if full is None else (cast("AIMessageChunk", full) + chunk)1794        assert isinstance(full, AIMessage)1795        _validate_tool_call_message(full)17961797        if tool_call_streaming:1798            assert found_tool_call_chunk, (1799                "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1800                "least one chunk during streaming, but none were found. If this "1801                "model does not support streaming tool calls, set "1802                "tool_call_streaming=false in the model's profile augmentations."1803            )18041805    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:1806        """Test bind runnables as tools.18071808        Test that the model generates tool calls for tools that are derived from1809        LangChain runnables. This test is skipped if the `has_tool_calling` property1810        on the test class is set to `False`.18111812        This test is optional and should be skipped if the model does not support1813        tool calling (see configuration below).18141815        ??? note "Configuration"18161817            To disable tool calling tests, set `has_tool_calling` to `False` in your1818            test class:18191820            ```python1821            class TestMyChatModelIntegration(ChatModelIntegrationTests):1822                @property1823                def has_tool_calling(self) -> bool:1824                    return False1825            ```18261827        ??? question "Troubleshooting"18281829            If this test fails, check that `bind_tools` is implemented to correctly1830            translate LangChain tool objects into the appropriate schema for your1831            chat model.18321833            This test may fail if the chat model does not support a `tool_choice`1834            parameter. This parameter can be used to force a tool call. If1835            `tool_choice` is not supported, set `has_tool_choice` to `False` in1836            your test class:18371838            ```python1839            @property1840            def has_tool_choice(self) -> bool:1841                return False1842            ```18431844        """1845        if not self.has_tool_calling:1846            pytest.skip("Test requires tool calling.")18471848        prompt = ChatPromptTemplate.from_messages(1849            [("human", "Hello. Please respond in the style of {answer_style}.")]1850        )1851        llm = GenericFakeChatModel(messages=iter(["hello matey"]))1852        chain = prompt | llm | StrOutputParser()1853        tool_ = chain.as_tool(1854            name="greeting_generator",1855            description="Generate a greeting in a particular style of speaking.",1856        )1857        if self.has_tool_choice:1858            tool_choice: str | None = "any"1859        else:1860            tool_choice = None1861        model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)1862        query = "Using the tool, generate a Pirate greeting."1863        result = model_with_tools.invoke(query)1864        assert isinstance(result, AIMessage)1865        assert result.tool_calls1866        tool_call = result.tool_calls[0]1867        assert tool_call["args"].get("answer_style")1868        assert tool_call.get("type") == "tool_call"18691870    def test_tool_message_histories_string_content(1871        self, model: BaseChatModel, my_adder_tool: BaseTool1872    ) -> None:1873        """Test that message histories are compatible with string tool contents.18741875        For instance with OpenAI format contents.1876        If a model passes this test, it should be compatible1877        with messages generated from providers following OpenAI format.18781879        This test should be skipped if the model does not support tool calling1880        (see configuration below).18811882        ??? note "Configuration"18831884            To disable tool calling tests, set `has_tool_calling` to `False` in your1885            test class:18861887            ```python1888            class TestMyChatModelIntegration(ChatModelIntegrationTests):1889                @property1890                def has_tool_calling(self) -> bool:1891                    return False1892            ```18931894        ??? question "Troubleshooting"18951896            If this test fails, check that:18971898            1. The model can correctly handle message histories that include1899                `AIMessage` objects with `""` content.1900            2. The `tool_calls` attribute on `AIMessage` objects is correctly1901                handled and passed to the model in an appropriate format.1902            3. The model can correctly handle `ToolMessage` objects with string1903                content and arbitrary string values for `tool_call_id`.19041905            You can `xfail` the test if tool calling is implemented but this format1906            is not supported.19071908            ```python1909            @pytest.mark.xfail(reason=("Not implemented."))1910            def test_tool_message_histories_string_content(self, *args: Any) -> None:1911                super().test_tool_message_histories_string_content(*args)1912            ```1913        """1914        if not self.has_tool_calling:1915            pytest.skip("Test requires tool calling.")19161917        model_with_tools = model.bind_tools([my_adder_tool])1918        function_name = "my_adder_tool"1919        function_args = {"a": 1, "b": 2}19201921        messages_string_content = [1922            HumanMessage("What is 1 + 2"),1923            # string content (e.g. OpenAI)1924            AIMessage(1925                "",1926                tool_calls=[1927                    {1928                        "name": function_name,1929                        "args": function_args,1930                        "id": "abc123",1931                        "type": "tool_call",1932                    },1933                ],1934            ),1935            ToolMessage(1936                json.dumps({"result": 3}),1937                name=function_name,1938                tool_call_id="abc123",1939            ),1940        ]1941        result_string_content = model_with_tools.invoke(messages_string_content)1942        assert isinstance(result_string_content, AIMessage)19431944    def test_tool_message_histories_list_content(1945        self,1946        model: BaseChatModel,1947        my_adder_tool: BaseTool,1948    ) -> None:1949        """Test that message histories are compatible with list tool contents.19501951        For instance with Anthropic format contents.19521953        These message histories will include `AIMessage` objects with "tool use" and1954        content blocks, e.g.,19551956        ```python1957        [1958            {"type": "text", "text": "Hmm let me think about that"},1959            {1960                "type": "tool_use",1961                "input": {"fav_color": "green"},1962                "id": "foo",1963                "name": "color_picker",1964            },1965        ]1966        ```19671968        This test should be skipped if the model does not support tool calling1969        (see configuration below).19701971        ??? note "Configuration"19721973            To disable tool calling tests, set `has_tool_calling` to `False` in your1974            test class:19751976            ```python1977            class TestMyChatModelIntegration(ChatModelIntegrationTests):1978                @property1979                def has_tool_calling(self) -> bool:1980                    return False1981            ```19821983        ??? question "Troubleshooting"19841985            If this test fails, check that:19861987            1. The model can correctly handle message histories that include1988                `AIMessage` objects with list content.1989            2. The `tool_calls` attribute on `AIMessage` objects is correctly1990                handled and passed to the model in an appropriate format.1991            3. The model can correctly handle ToolMessage objects with string content1992                and arbitrary string values for `tool_call_id`.19931994            You can `xfail` the test if tool calling is implemented but this format1995            is not supported.19961997            ```python1998            @pytest.mark.xfail(reason=("Not implemented."))1999            def test_tool_message_histories_list_content(self, *args: Any) -> None:2000                super().test_tool_message_histories_list_content(*args)
Findings

✓ No findings reported for this file.
Findings

Get this view in your editor