libs/standard-tests/langchain_tests/integration_tests/chat_models.py PYTHON 3,593 lines View on github.com → Search inside
File is large — showing lines 1–2,000 of 3,593.
1"""Integration tests for chat models."""23from __future__ import annotations45import base646import json7import os8import warnings9from typing import TYPE_CHECKING, Annotated, Any, Literal10from unittest.mock import MagicMock1112import httpx13import pytest14from langchain_core.callbacks import BaseCallbackHandler15from langchain_core.language_models import BaseChatModel, GenericFakeChatModel16from langchain_core.language_models.chat_model_stream import (17    AsyncChatModelStream,18    ChatModelStream,19)20from langchain_core.messages import (21    AIMessage,22    AIMessageChunk,23    BaseMessage,24    HumanMessage,25    SystemMessage,26    ToolMessage,27)28from langchain_core.output_parsers import StrOutputParser29from langchain_core.prompts import ChatPromptTemplate30from langchain_core.tools import BaseTool, tool31from langchain_core.utils.function_calling import (32    convert_to_json_schema,33    tool_example_to_messages,34)35from pydantic import BaseModel, Field36from pydantic.v1 import BaseModel as BaseModelV137from pydantic.v1 import Field as FieldV138from typing_extensions import TypedDict, override3940from langchain_tests.unit_tests.chat_models import ChatModelTests41from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION42from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream4344if TYPE_CHECKING:45    from pytest_benchmark.fixture import (46        BenchmarkFixture,47    )48    from vcr.cassette import Cassette495051def _get_joke_class(  # noqa: RET50352    schema_type: Literal["pydantic", "typeddict", "json_schema"],53) -> Any:54    class Joke(BaseModel):55        """Joke to tell user."""5657        setup: str = Field(description="question to set up a joke")58        punchline: str = Field(description="answer to resolve the joke")5960    def validate_joke(result: Any) -> bool:61        return isinstance(result, Joke)6263    class JokeDict(TypedDict):64        """Joke to tell user."""6566        setup: Annotated[str, ..., "question to set up a joke"]67        punchline: Annotated[str, ..., "answer to resolve the joke"]6869    def validate_joke_dict(result: Any) -> bool:70        return all(key in {"setup", "punchline"} for key in result)7172    if schema_type == "pydantic":73        return Joke, validate_joke7475    if schema_type == "typeddict":76        return JokeDict, validate_joke_dict7778    if schema_type == "json_schema":79        return Joke.model_json_schema(), validate_joke_dict808182class _TestCallbackHandler(BaseCallbackHandler):83    options: list[dict[str, Any] | None]8485    def __init__(self) -> None:86        super().__init__()87        self.options = []8889    @override90    def on_chat_model_start(91        self,92        serialized: Any,93        messages: Any,94        *,95        options: dict[str, Any] | None = None,96        **kwargs: Any,97    ) -> None:98        self.options.append(options)99100101class _MagicFunctionSchema(BaseModel):102    input: int = Field(..., gt=-1000, lt=1000)103104105@tool(args_schema=_MagicFunctionSchema)106def magic_function(_input: int) -> int:107    """Apply a magic function to an input."""108    return _input + 2109110111@tool112def magic_function_no_args() -> int:113    """Calculate a magic function."""114    return 5115116117def _validate_tool_call_message(message: BaseMessage) -> None:118    assert isinstance(message, AIMessage)119    assert len(message.tool_calls) == 1120121    tool_call = message.tool_calls[0]122    assert tool_call["name"] == "magic_function"123    assert tool_call["args"] == {"input": 3}124    assert tool_call["id"] is not None125    assert tool_call.get("type") == "tool_call"126127    content_tool_calls = [128        block for block in message.content_blocks if block["type"] == "tool_call"129    ]130    assert len(content_tool_calls) == 1131    content_tool_call = content_tool_calls[0]132    assert content_tool_call["name"] == "magic_function"133    assert content_tool_call["args"] == {"input": 3}134    assert content_tool_call["id"] is not None135136137def _validate_tool_call_chunk(chunk: AIMessageChunk) -> bool:138    """Check whether a streaming chunk contains valid `tool_call_chunk` blocks.139140    Returns:141        `True` if at least one `tool_call_chunk` block was found.142    """143    found = False144    for block in chunk.content_blocks:145        if block.get("type") == "tool_call_chunk":146            found = True147            assert "name" in block, "tool_call_chunk block missing 'name' field"148            assert "args" in block, "tool_call_chunk block missing 'args' field"149            assert "id" in block, "tool_call_chunk block missing 'id' field"150    return found151152153def _validate_tool_call_message_no_args(message: BaseMessage) -> None:154    assert isinstance(message, AIMessage)155    assert len(message.tool_calls) == 1156157    tool_call = message.tool_calls[0]158    assert tool_call["name"] == "magic_function_no_args"159    assert tool_call["args"] == {}160    assert tool_call["id"] is not None161    assert tool_call.get("type") == "tool_call"162163164def _get_base64_from_url(url: str) -> str:165    user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT")166    if not user_agent:167        warning_message = (168            "LANGCHAIN_TESTS_USER_AGENT environment variable not set. "169            "langchain-tests pulls (CC0 License) audio data from wikimedia.org. "170            "Consider setting a user agent to identify your requests. See "171            "https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy"172        )173        warnings.warn(warning_message, stacklevel=2)174    headers = {"User-Agent": user_agent} if user_agent else {}175    httpx_response = httpx.get(url, headers=headers, timeout=10.0).content176    return base64.b64encode(httpx_response).decode("utf-8")177178179@tool180def unicode_customer(customer_name: str, description: str) -> str:181    """Tool for creating a customer with Unicode name.182183    Args:184        customer_name: The customer's name in their native language.185        description: Description of the customer.186187    Returns:188        A confirmation message about the customer creation.189190    """191    return f"Created customer: {customer_name} - {description}"192193194class ChatModelIntegrationTests(ChatModelTests):195    '''Base class for chat model integration tests.196197    Test subclasses must implement the `chat_model_class` and198    `chat_model_params` properties to specify what model to test and its199    initialization parameters.200201    ```python202    from typing import Type203204    from langchain_tests.integration_tests import ChatModelIntegrationTests205    from my_package.chat_models import MyChatModel206207208    class TestMyChatModelIntegration(ChatModelIntegrationTests):209        @property210        def chat_model_class(self) -> Type[MyChatModel]:211            # Return the chat model class to test here212            return MyChatModel213214        @property215        def chat_model_params(self) -> dict:216            # Return initialization parameters for the model.217            return {"model": "model-001", "temperature": 0}218    ```219220    !!! note221        API references for individual test methods include troubleshooting tips.222223224    Test subclasses **must** implement the following two properties:225226    `chat_model_class`: The chat model class to test, e.g., `ChatParrotLink`.227228    ```python229    @property230    def chat_model_class(self) -> Type[ChatParrotLink]:231        return ChatParrotLink232    ```233234    `chat_model_params`: Initialization parameters for the chat model.235236    ```python237    @property238    def chat_model_params(self) -> dict:239        return {"model": "bird-brain-001", "temperature": 0}240    ```241242    In addition, test subclasses can control what features are tested (such as tool243    calling or multi-modality) by selectively overriding the following properties.244245    Expand to see details:246247    ???+ info "`has_tool_calling`"248249        Boolean property indicating whether the chat model supports tool calling.250251        By default, this is determined by whether the chat model's `bind_tools` method252        is overridden. It typically does not need to be overridden on the test class.253254        ```python255        @property256        def has_tool_calling(self) -> bool:257            return True258        ```259260    ??? info "`has_tool_choice`"261262        Boolean property indicating whether the chat model supports forcing tool263        calling via a `tool_choice` parameter.264265        By default, this is determined by whether the parameter is included in the266        signature for the corresponding `bind_tools` method.267268        If `True`, the minimum requirement for this feature is that269        `tool_choice='any'` will force a tool call, and `tool_choice=<tool name>`270        will force a call to a specific tool.271272        ```python273        @property274        def has_tool_choice(self) -> bool:275            return False276        ```277278    ??? info "`has_structured_output`"279280        Boolean property indicating whether the chat model supports structured281        output.282283        By default, this is determined by whether the chat model's284        `with_structured_output` method is overridden. If the base implementation is285        intended to be used, this method should be overridden.286287        See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).288289        ```python290        @property291        def has_structured_output(self) -> bool:292            return True293        ```294295    ??? info "`structured_output_kwargs`"296297        Dict property specifying additional kwargs to pass to298        `with_structured_output()` when running structured output tests.299300        Override this to customize how your model generates structured output.301302        The most common use case is specifying the `method` parameter:303304        - `'function_calling'`: Uses tool/function calling to enforce the schema.305        - `'json_mode'`: Uses the model's JSON mode.306        - `'json_schema'`: Uses native JSON schema support (e.g., OpenAI's structured307            outputs).308309        ```python310        @property311        def structured_output_kwargs(self) -> dict:312            return {"method": "json_schema"}313        ```314315    ??? info "`supports_json_mode`"316317        Boolean property indicating whether the chat model supports318        `method='json_mode'` in `with_structured_output`.319320        Defaults to `False`.321322        JSON mode constrains the model to output valid JSON without enforcing323        a specific schema (unlike `'function_calling'` or `'json_schema'` methods).324325        When using JSON mode, you must prompt the model to output JSON in your326        message.327328        !!! example329330            ```python331            structured_llm = llm.with_structured_output(MySchema, method="json_mode")332            structured_llm.invoke("... Return the result as JSON.")333            ```334335        See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).336337        ```python338        @property339        def supports_json_mode(self) -> bool:340            return True341        ```342343    ??? info "`supports_image_inputs`"344345        Boolean property indicating whether the chat model supports image inputs.346347        Defaults to `False`.348349        If set to `True`, the chat model will be tested by inputting an350        `ImageContentBlock` with the shape:351352        ```python353        {354            "type": "image",355            "base64": "<base64 image data>",356            "mime_type": "image/jpeg",  # or appropriate MIME type357        }358        ```359360        In addition to OpenAI-style content blocks:361362        ```python363        {364            "type": "image_url",365            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},366        }367        ```368369        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).370371        ```python372        @property373        def supports_image_inputs(self) -> bool:374            return True375        ```376377    ??? info "`supports_image_urls`"378379        Boolean property indicating whether the chat model supports image inputs from380        URLs.381382        Defaults to `False`.383384        If set to `True`, the chat model will be tested using content blocks of the385        form386387        ```python388        {389            "type": "image",390            "url": "https://...",391        }392        ```393394        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).395396        ```python397        @property398        def supports_image_urls(self) -> bool:399            return True400        ```401402    ??? info "`supports_image_tool_message`"403404        Boolean property indicating whether the chat model supports a `ToolMessage`405        that includes image content, e.g. in the OpenAI Chat Completions format.406407        Defaults to `False`.408409        ```python410        ToolMessage(411            content=[412                {413                    "type": "image_url",414                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},415                },416            ],417            tool_call_id="1",418            name="random_image",419        )420        ```421422        ...as well as the LangChain `ImageContentBlock` format:423424        ```python425        ToolMessage(426            content=[427                {428                    "type": "image",429                    "base64": image_data,430                    "mime_type": "image/jpeg",431                },432            ],433            tool_call_id="1",434            name="random_image",435        )436        ```437438        If set to `True`, the chat model will be tested with message sequences that439        include `ToolMessage` objects of this form.440441        ```python442        @property443        def supports_image_tool_message(self) -> bool:444            return True445        ```446447    ??? info "`supports_pdf_inputs`"448449        Boolean property indicating whether the chat model supports PDF inputs.450451        Defaults to `False`.452453        If set to `True`, the chat model will be tested by inputting a454        `FileContentBlock` with the shape:455456        ```python457        {458            "type": "file",459            "base64": "<base64 file data>",460            "mime_type": "application/pdf",461        }462        ```463464        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).465466        ```python467        @property468        def supports_pdf_inputs(self) -> bool:469            return True470        ```471472    ??? info "`supports_pdf_tool_message`"473474        Boolean property indicating whether the chat model supports a `ToolMessage`475        that includes PDF content using the LangChain `FileContentBlock` format.476477        Defaults to `False`.478479        ```python480        ToolMessage(481            content=[482                {483                    "type": "file",484                    "base64": pdf_data,485                    "mime_type": "application/pdf",486                },487            ],488            tool_call_id="1",489            name="random_pdf",490        )491        ```492493        If set to `True`, the chat model will be tested with message sequences that494        include `ToolMessage` objects of this form.495496        ```python497        @property498        def supports_pdf_tool_message(self) -> bool:499            return True500        ```501502    ??? info "`supports_audio_inputs`"503504        Boolean property indicating whether the chat model supports audio inputs.505506        Defaults to `False`.507508        If set to `True`, the chat model will be tested by inputting an509        `AudioContentBlock` with the shape:510511        ```python512        {513            "type": "audio",514            "base64": "<base64 audio data>",515            "mime_type": "audio/wav",  # or appropriate MIME type516        }517        ```518519        See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).520521        ```python522        @property523        def supports_audio_inputs(self) -> bool:524            return True525        ```526527        !!! warning528            This test downloads audio data from wikimedia.org. You may need to set the529            `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these tests,530            e.g.,531532            ```bash533            export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"534            ```535536            Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).537538    ??? info "`supports_video_inputs`"539540        Boolean property indicating whether the chat model supports image inputs.541542        Defaults to `False`.543544        No current tests are written for this feature.545546    ??? info "`returns_usage_metadata`"547548        Boolean property indicating whether the chat model returns usage metadata549        on invoke and streaming responses.550551        Defaults to `True`.552553        `usage_metadata` is an optional dict attribute on `AIMessage` objects that track554        input and output tokens.555556        [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).557558        ```python559        @property560        def returns_usage_metadata(self) -> bool:561            return False562        ```563564        Models supporting `usage_metadata` should also return the name of the underlying565        model in the `response_metadata` of the `AIMessage`.566567    ??? info "`supports_anthropic_inputs`"568569        Boolean property indicating whether the chat model supports Anthropic-style570        inputs.571572        Defaults to `False`.573574        These inputs might feature "tool use" and "tool result" content blocks, e.g.,575576        ```python577        [578            {"type": "text", "text": "Hmm let me think about that"},579            {580                "type": "tool_use",581                "input": {"fav_color": "green"},582                "id": "foo",583                "name": "color_picker",584            },585        ]586        ```587588        If set to `True`, the chat model will be tested using content blocks of this589        form.590591        ```python592        @property593        def supports_anthropic_inputs(self) -> bool:594            return True595        ```596597    ??? info "`supported_usage_metadata_details`"598599        Property controlling what usage metadata details are emitted in both invoke600        and stream.601602        Defaults to `{"invoke": [], "stream": []}`.603604        `usage_metadata` is an optional dict attribute on `AIMessage` objects that track605        input and output tokens.606607        [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).608609        It includes optional keys `input_token_details` and `output_token_details`610        that can track usage details associated with special types of tokens, such as611        cached, audio, or reasoning.612613        Only needs to be overridden if these details are supplied.614615    ??? info "`enable_vcr_tests`"616617        Property controlling whether to enable select tests that rely on618        [VCR](https://vcrpy.readthedocs.io/en/latest/) caching of HTTP calls, such619        as benchmarking tests.620621        Defaults to `False`.622623        To enable these tests, follow these steps:624625        1. Override the `enable_vcr_tests` property to return `True`:626627            ```python628            @property629            def enable_vcr_tests(self) -> bool:630                return True631            ```632633        2. Configure VCR to exclude sensitive headers and other information from634            cassettes.635636            !!! warning637                VCR will by default record authentication headers and other sensitive638                information in cassettes. Read below for how to configure what639                information is recorded in cassettes.640641            To add configuration to VCR, add a `conftest.py` file to the `tests/`642            directory and implement the `vcr_config` fixture there.643644            `langchain-tests` excludes the headers `'authorization'`,645            `'x-api-key'`, and `'api-key'` from VCR cassettes. To pick up this646            configuration, you will need to add `conftest.py` as shown below. You can647            also exclude additional headers, override the default exclusions, or apply648            other customizations to the VCR configuration. See example below:649650            ```python title="tests/conftest.py"651            import pytest652            from langchain_tests.conftest import base_vcr_config653654            _EXTRA_HEADERS = [655                # Specify additional headers to redact656                ("user-agent", "PLACEHOLDER"),657            ]658659660            def remove_response_headers(response: dict) -> dict:661                # If desired, remove or modify headers in the response.662                response["headers"] = {}663                return response664665666            @pytest.fixture(scope="session")667            def vcr_config() -> dict:668                """Extend the default configuration from langchain_tests."""669                config = base_vcr_config()670                config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)671                config["before_record_response"] = remove_response_headers672673                return config674            ```675676            ??? note "Compressing cassettes"677678                `langchain-tests` includes a custom VCR serializer that compresses679                cassettes using gzip. To use it, register the `yaml.gz` serializer680                to your VCR fixture and enable this serializer in the config. See681                example below:682683                ```python title="tests/conftest.py"684                import pytest685                from langchain_tests.conftest import (686                    CustomPersister,687                    CustomSerializer,688                )689                from langchain_tests.conftest import base_vcr_config690                from vcr import VCR691692                _EXTRA_HEADERS = [693                    # Specify additional headers to redact694                    ("user-agent", "PLACEHOLDER"),695                ]696697698                def remove_response_headers(response: dict) -> dict:699                    # If desired, remove or modify headers in the response.700                    response["headers"] = {}701                    return response702703704                @pytest.fixture(scope="session")705                def vcr_config() -> dict:706                    """Extend the default configuration from langchain_tests."""707                    config = base_vcr_config()708                    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)709                    config["before_record_response"] = remove_response_headers710                    # New: enable serializer and set file extension711                    config["serializer"] = "yaml.gz"712                    config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")713714                    return config715716717                def pytest_recording_configure(config: dict, vcr: VCR) -> None:718                    vcr.register_persister(CustomPersister())719                    vcr.register_serializer("yaml.gz", CustomSerializer())720                ```721722                You can inspect the contents of the compressed cassettes (e.g., to723                ensure no sensitive information is recorded) using724725                ```bash726                gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz727                ```728729                ...or by using the serializer:730731                ```python732                from langchain_tests.conftest import (733                    CustomPersister,734                    CustomSerializer,735                )736737                cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"738                requests, responses = CustomPersister().load_cassette(739                    path, CustomSerializer()740                )741                ```742743        3. Run tests to generate VCR cassettes.744745            ```bash title="Example"746            uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time747            ```748749            This will generate a VCR cassette for the test in750            `tests/integration_tests/cassettes/`.751752            !!! warning753                You should inspect the generated cassette to ensure that it does not754                contain sensitive information. If it does, you can modify the755                `vcr_config` fixture to exclude headers or modify the response756                before it is recorded.757758            You can then commit the cassette to your repository. Subsequent test runs759            will use the cassette instead of making HTTP calls.760    '''  # noqa: E501761762    @property763    def standard_chat_model_params(self) -> dict[str, Any]:764        """Standard parameters for chat model."""765        return {}766767    def test_invoke(self, model: BaseChatModel) -> None:768        """Test to verify that `model.invoke(simple_message)` works.769770        This should pass for all integrations.771772        ??? question "Troubleshooting"773774            If this test fails, you should make sure your `_generate` method775            does not raise any exceptions, and that it returns a valid776            `langchain_core.outputs.chat_result.ChatResult` like so:777778            ```python779            return ChatResult(780                generations=[ChatGeneration(message=AIMessage(content="Output text"))]781            )782            ```783784        """785        result = model.invoke("Hello")786        assert result is not None787        assert isinstance(result, AIMessage)788        assert isinstance(result.text, str)789        assert len(result.content) > 0790791    async def test_ainvoke(self, model: BaseChatModel) -> None:792        """Test to verify that `await model.ainvoke(simple_message)` works.793794        This should pass for all integrations. Passing this test does not indicate795        a "natively async" implementation, but rather that the model can be used796        in an async context.797798        ??? question "Troubleshooting"799800            First, debug801            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.802            because `ainvoke` has a default implementation that calls `invoke` in an803            async context.804805            If that test passes but not this one, you should make sure your `_agenerate`806            method does not raise any exceptions, and that it returns a valid807            `langchain_core.outputs.chat_result.ChatResult` like so:808809            ```python810            return ChatResult(811                generations=[ChatGeneration(message=AIMessage(content="Output text"))]812            )813            ```814        """815        result = await model.ainvoke("Hello")816        assert result is not None817        assert isinstance(result, AIMessage)818        assert isinstance(result.text, str)819        assert len(result.content) > 0820821    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)822    def test_stream(self, model: BaseChatModel) -> None:823        """Test to verify that `model.stream(simple_message)` works.824825        This should pass for all integrations. Passing this test does not indicate826        a "streaming" implementation, but rather that the model can be used in a827        streaming context.828829        ??? question "Troubleshooting"830831            First, debug832            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.833            because `stream` has a default implementation that calls `invoke` and834            yields the result as a single chunk.835836            If that test passes but not this one, you should make sure your `_stream`837            method does not raise any exceptions, and that it yields valid838            `langchain_core.outputs.chat_generation.ChatGenerationChunk`839            objects like so:840841            ```python842            yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))843            ```844845            The final chunk must have `chunk_position='last'` to signal stream846            completion. This enables proper parsing of `tool_call_chunks` into847            `tool_calls` on the aggregated message:848849            ```python850            for i, token in enumerate(tokens):851                is_last = i == len(tokens) - 1852                yield ChatGenerationChunk(853                    message=AIMessageChunk(854                        content=token,855                        chunk_position="last" if is_last else None,856                    )857                )858            ```859        """860        chunks: list[AIMessageChunk] = []861        full: AIMessageChunk | None = None862        for chunk in model.stream("Hello"):863            assert chunk is not None864            assert isinstance(chunk, AIMessageChunk)865            assert isinstance(chunk.content, str | list)866            chunks.append(chunk)867            full = chunk if full is None else full + chunk868        assert len(chunks) > 0869        assert isinstance(full, AIMessageChunk)870        assert full.content871        assert full.text872        # Exactly one text block  guards against merge bugs that would produce873        # multiple adjacent text blocks in the aggregated result.874        text_blocks = [b for b in full.content_blocks if b["type"] == "text"]875        assert len(text_blocks) == 1876877        # Verify chunk_position signaling878        last_chunk = chunks[-1]879        assert last_chunk.chunk_position == "last", (880            f"Final chunk must have chunk_position='last', "881            f"got {last_chunk.chunk_position!r}"882        )883884    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)885    async def test_astream(self, model: BaseChatModel) -> None:886        """Test to verify that `await model.astream(simple_message)` works.887888        This should pass for all integrations. Passing this test does not indicate889        a "natively async" or "streaming" implementation, but rather that the model can890        be used in an async streaming context.891892        ??? question "Troubleshooting"893894            First, debug895            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.896            and897            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.898            because `astream` has a default implementation that calls `_stream` in899            an async context if it is implemented, or `ainvoke` and yields the result900            as a single chunk if not.901902            If those tests pass but not this one, you should make sure your `_astream`903            method does not raise any exceptions, and that it yields valid904            `langchain_core.outputs.chat_generation.ChatGenerationChunk`905            objects like so:906907            ```python908            yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))909            ```910911            See `test_stream` troubleshooting for `chunk_position` requirements.912        """913        chunks: list[AIMessageChunk] = []914        full: AIMessageChunk | None = None915        async for chunk in model.astream("Hello"):916            assert chunk is not None917            assert isinstance(chunk, AIMessageChunk)918            assert isinstance(chunk.content, str | list)919            chunks.append(chunk)920            full = chunk if full is None else full + chunk921        assert len(chunks) > 0922        assert isinstance(full, AIMessageChunk)923        assert full.content924        assert full.text925        # Exactly one text block  guards against merge bugs that would produce926        # multiple adjacent text blocks in the aggregated result.927        text_blocks = [b for b in full.content_blocks if b["type"] == "text"]928        assert len(text_blocks) == 1929930        # Verify chunk_position signaling931        last_chunk = chunks[-1]932        assert last_chunk.chunk_position == "last", (933            f"Final chunk must have chunk_position='last', "934            f"got {last_chunk.chunk_position!r}"935        )936937    def test_stream_events_v3(self, model: BaseChatModel) -> None:938        """Test that `model.stream_events("Hello", version="v3")` works.939940        Exercises the content-block-centric streaming protocol. Passing this941        test indicates the model participates in `stream_events(version="v3")` either942        natively (via `_stream_chat_model_events`) or through the compat bridge that943        converts `_stream` chunks into protocol events.944945        ??? question "Troubleshooting"946947            First, debug948            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`949             `stream_events(version="v3")` falls back to the same950            `_stream` path via the compat bridge when the model does not951            implement952            `_stream_chat_model_events`. If `test_stream` passes but this does953            not, inspect the raised lifecycle violation: it identifies the954            event index and the rule broken.955        """956        stream = model.stream_events("Hello", version="v3")957        assert isinstance(stream, ChatModelStream)958959        events = list(stream)960        assert len(events) > 0961        assert_valid_event_stream(events)962963        message = stream.output964        assert isinstance(message, AIMessage)965        assert message.content966        assert message.text967        assert any(block["type"] == "text" for block in message.content_blocks)968        # `stream_events(version="v3")` always assembles content as v1 protocol blocks.969        assert message.response_metadata.get("output_version") == "v1"970971    async def test_astream_events_v3(self, model: BaseChatModel) -> None:972        """Test that `await model.astream_events("Hello", version="v3")` works.973974        Async counterpart to `test_stream_events_v3`. Exercises the975        `AsyncChatModelStream` path end-to-end: the background producer task,976        replay-buffer-backed event iteration, and the awaitable `output`977        projection.978979        ??? question "Troubleshooting"980981            First, debug982            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_astream`.983            If `test_astream` passes but this does not, inspect the raised984            lifecycle violation; it identifies the event index and the rule985            broken.986        """987        stream = await model.astream_events("Hello", version="v3")988        assert isinstance(stream, AsyncChatModelStream)989990        events = [event async for event in stream]991        assert len(events) > 0992        assert_valid_event_stream(events)993994        message = await stream.output995        assert isinstance(message, AIMessage)996        assert message.content997        assert message.text998        assert any(block["type"] == "text" for block in message.content_blocks)999        assert message.response_metadata.get("output_version") == "v1"10001001    def test_invoke_with_model_override(self, model: BaseChatModel) -> None:1002        """Test that model name can be overridden at invoke time via kwargs.10031004        This enables dynamic model selection without creating new instances,1005        which is useful for fallback strategies, A/B testing, or cost optimization.10061007        Test is skipped if `supports_model_override` is `False`.10081009        ??? question "Troubleshooting"10101011            If this test fails, ensure that your `_generate` method passes1012            `**kwargs` through to the API request payload in a way that allows1013            the `model` parameter to be overridden.10141015            For example:1016            ```python1017            def _get_request_payload(self, ..., **kwargs) -> dict:1018                return {1019                    "model": self.model,1020                    ...1021                    **kwargs,  # kwargs should come last to allow overrides1022                }1023            ```1024        """1025        if not self.supports_model_override:1026            pytest.skip("Model override not supported.")10271028        override_model = self.model_override_value1029        if not override_model:1030            pytest.skip("model_override_value not specified.")10311032        result = model.invoke("Hello", model=override_model)1033        assert result is not None1034        assert isinstance(result, AIMessage)10351036        # Verify the overridden model was used1037        model_name = result.response_metadata.get("model_name")1038        assert model_name is not None, "model_name not found in response_metadata"1039        assert override_model in model_name, (1040            f"Expected model '{override_model}' but got '{model_name}'"1041        )10421043    async def test_ainvoke_with_model_override(self, model: BaseChatModel) -> None:1044        """Test that model name can be overridden at ainvoke time via kwargs.10451046        Test is skipped if `supports_model_override` is `False`.10471048        ??? question "Troubleshooting"10491050            See troubleshooting for `test_invoke_with_model_override`.1051        """1052        if not self.supports_model_override:1053            pytest.skip("Model override not supported.")10541055        override_model = self.model_override_value1056        if not override_model:1057            pytest.skip("model_override_value not specified.")10581059        result = await model.ainvoke("Hello", model=override_model)1060        assert result is not None1061        assert isinstance(result, AIMessage)10621063        # Verify the overridden model was used1064        model_name = result.response_metadata.get("model_name")1065        assert model_name is not None, "model_name not found in response_metadata"1066        assert override_model in model_name, (1067            f"Expected model '{override_model}' but got '{model_name}'"1068        )10691070    def test_stream_with_model_override(self, model: BaseChatModel) -> None:1071        """Test that model name can be overridden at stream time via kwargs.10721073        Test is skipped if `supports_model_override` is `False`.10741075        ??? question "Troubleshooting"10761077            See troubleshooting for `test_invoke_with_model_override`.1078        """1079        if not self.supports_model_override:1080            pytest.skip("Model override not supported.")10811082        override_model = self.model_override_value1083        if not override_model:1084            pytest.skip("model_override_value not specified.")10851086        full: AIMessageChunk | None = None1087        for chunk in model.stream("Hello", model=override_model):1088            assert isinstance(chunk, AIMessageChunk)1089            full = chunk if full is None else full + chunk10901091        assert full is not None10921093        # Verify the overridden model was used1094        model_name = full.response_metadata.get("model_name")1095        assert model_name is not None, "model_name not found in response_metadata"1096        assert override_model in model_name, (1097            f"Expected model '{override_model}' but got '{model_name}'"1098        )10991100    async def test_astream_with_model_override(self, model: BaseChatModel) -> None:1101        """Test that model name can be overridden at astream time via kwargs.11021103        Test is skipped if `supports_model_override` is `False`.11041105        ??? question "Troubleshooting"11061107            See troubleshooting for `test_invoke_with_model_override`.1108        """1109        if not self.supports_model_override:1110            pytest.skip("Model override not supported.")11111112        override_model = self.model_override_value1113        if not override_model:1114            pytest.skip("model_override_value not specified.")11151116        full: AIMessageChunk | None = None1117        async for chunk in model.astream("Hello", model=override_model):1118            assert isinstance(chunk, AIMessageChunk)1119            full = chunk if full is None else full + chunk11201121        assert full is not None11221123        # Verify the overridden model was used1124        model_name = full.response_metadata.get("model_name")1125        assert model_name is not None, "model_name not found in response_metadata"1126        assert override_model in model_name, (1127            f"Expected model '{override_model}' but got '{model_name}'"1128        )11291130    def test_batch(self, model: BaseChatModel) -> None:1131        """Test to verify that `model.batch([messages])` works.11321133        This should pass for all integrations. Tests the model's ability to process1134        multiple prompts in a single batch.11351136        ??? question "Troubleshooting"11371138            First, debug1139            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1140            because `batch` has a default implementation that calls `invoke` for1141            each message in the batch.11421143            If that test passes but not this one, you should make sure your `batch`1144            method does not raise any exceptions, and that it returns a list of valid1145            `AIMessage` objects.11461147        """1148        batch_results = model.batch(["Hello", "Hey"])1149        assert batch_results is not None1150        assert isinstance(batch_results, list)1151        assert len(batch_results) == 21152        for result in batch_results:1153            assert result is not None1154            assert isinstance(result, AIMessage)1155            assert isinstance(result.text, str)1156            assert len(result.content) > 011571158    async def test_abatch(self, model: BaseChatModel) -> None:1159        """Test to verify that `await model.abatch([messages])` works.11601161        This should pass for all integrations. Tests the model's ability to process1162        multiple prompts in a single batch asynchronously.11631164        ??? question "Troubleshooting"11651166            First, debug1167            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`1168            and1169            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`1170            because `abatch` has a default implementation that calls `ainvoke` for1171            each message in the batch.11721173            If those tests pass but not this one, you should make sure your `abatch`1174            method does not raise any exceptions, and that it returns a list of valid1175            `AIMessage` objects.11761177        """1178        batch_results = await model.abatch(["Hello", "Hey"])1179        assert batch_results is not None1180        assert isinstance(batch_results, list)1181        assert len(batch_results) == 21182        for result in batch_results:1183            assert result is not None1184            assert isinstance(result, AIMessage)1185            assert isinstance(result.text, str)1186            assert len(result.content) > 011871188    def test_conversation(self, model: BaseChatModel) -> None:1189        """Test to verify that the model can handle multi-turn conversations.11901191        This should pass for all integrations. Tests the model's ability to process1192        a sequence of alternating `HumanMessage` and `AIMessage` objects as context for1193        generating the next response.11941195        ??? question "Troubleshooting"11961197            First, debug1198            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1199            because this test also uses `model.invoke`.12001201            If that test passes but not this one, you should verify that:12021203            1. Your model correctly processes the message history1204            2. The model maintains appropriate context from previous messages1205            3. The response is a valid `langchain_core.messages.AIMessage`12061207        """1208        messages = [1209            HumanMessage("hello"),1210            AIMessage("hello"),1211            HumanMessage("how are you"),1212        ]12131214        result = model.invoke(messages)1215        assert result is not None1216        assert isinstance(result, AIMessage)1217        assert isinstance(result.text, str)1218        assert len(result.content) > 012191220    def test_double_messages_conversation(self, model: BaseChatModel) -> None:1221        """Test to verify that the model can handle double-message conversations.12221223        This should pass for all integrations. Tests the model's ability to process1224        a sequence of double-system, double-human, and double-ai messages as context1225        for generating the next response.12261227        ??? question "Troubleshooting"12281229            First, debug1230            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1231            because this test also uses `model.invoke`.12321233            Second, debug1234            `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`1235            because this test is the "basic case" without double messages.12361237            If that test passes those but not this one, you should verify that:12381239            1. Your model API can handle double messages, or the integration should1240                merge messages before sending them to the API.1241            2. The response is a valid `langchain_core.messages.AIMessage`12421243        """1244        messages = [1245            SystemMessage("hello"),1246            SystemMessage("hello"),1247            HumanMessage("hello"),1248            HumanMessage("hello"),1249            AIMessage("hello"),1250            AIMessage("hello"),1251            HumanMessage("how are you"),1252        ]12531254        result = model.invoke(messages)1255        assert result is not None1256        assert isinstance(result, AIMessage)1257        assert isinstance(result.text, str)1258        assert len(result.content) > 012591260    def test_usage_metadata(self, model: BaseChatModel) -> None:1261        """Test to verify that the model returns correct usage metadata.12621263        This test is optional and should be skipped if the model does not return1264        usage metadata (see configuration below).12651266        !!! warning "Behavior changed in `langchain-tests` 0.3.17"12671268            Additionally check for the presence of `model_name` in the response1269            metadata, which is needed for usage tracking in callback handlers.12701271        ??? note "Configuration"12721273            By default, this test is run.12741275            To disable this feature, set `returns_usage_metadata` to `False` in your1276            test class:12771278            ```python1279            class TestMyChatModelIntegration(ChatModelIntegrationTests):1280                @property1281                def returns_usage_metadata(self) -> bool:1282                    return False1283            ```12841285            This test can also check the format of specific kinds of usage metadata1286            based on the `supported_usage_metadata_details` property.12871288            This property should be configured as follows with the types of tokens that1289            the model supports tracking:12901291            ```python1292            class TestMyChatModelIntegration(ChatModelIntegrationTests):1293                @property1294                def supported_usage_metadata_details(self) -> dict:1295                    return {1296                        "invoke": [1297                            "audio_input",1298                            "audio_output",1299                            "reasoning_output",1300                            "cache_read_input",1301                            "cache_creation_input",1302                        ],1303                        "stream": [1304                            "audio_input",1305                            "audio_output",1306                            "reasoning_output",1307                            "cache_read_input",1308                            "cache_creation_input",1309                        ],1310                    }1311            ```13121313        ??? question "Troubleshooting"13141315            If this test fails, first verify that your model returns1316            `langchain_core.messages.ai.UsageMetadata` dicts1317            attached to the returned `AIMessage` object in `_generate`:13181319            ```python1320            return ChatResult(1321                generations=[1322                    ChatGeneration(1323                        message=AIMessage(1324                            content="Output text",1325                            usage_metadata={1326                                "input_tokens": 350,1327                                "output_tokens": 240,1328                                "total_tokens": 590,1329                                "input_token_details": {1330                                    "audio": 10,1331                                    "cache_creation": 200,1332                                    "cache_read": 100,1333                                },1334                                "output_token_details": {1335                                    "audio": 10,1336                                    "reasoning": 200,1337                                },1338                            },1339                        )1340                    )1341                ]1342            )1343            ```13441345            Check also that the response includes a `model_name` key in its1346            `usage_metadata`.1347        """1348        if not self.returns_usage_metadata:1349            pytest.skip("Not implemented.")13501351        result = model.invoke("Hello")1352        assert result is not None1353        assert isinstance(result, AIMessage)13541355        assert result.usage_metadata is not None1356        assert isinstance(result.usage_metadata["input_tokens"], int)1357        assert isinstance(result.usage_metadata["output_tokens"], int)1358        assert isinstance(result.usage_metadata["total_tokens"], int)13591360        # Check model_name is in response_metadata1361        # Needed for langchain_core.callbacks.usage1362        model_name = result.response_metadata.get("model_name")1363        assert isinstance(model_name, str)1364        assert model_name, "model_name is empty"13651366        # `input_tokens` is the total, possibly including other unclassified or1367        # system-level tokens.1368        if "audio_input" in self.supported_usage_metadata_details["invoke"]:1369            # Checks if the specific chat model integration being tested has declared1370            # that it supports reporting token counts specifically for `audio_input`1371            msg = self.invoke_with_audio_input()  # To be implemented in test subclass1372            assert (usage_metadata := msg.usage_metadata) is not None1373            assert (1374                input_token_details := usage_metadata.get("input_token_details")1375            ) is not None1376            assert isinstance(input_token_details.get("audio"), int)1377            # Asserts that total input tokens are at least the sum of the token counts1378            assert usage_metadata.get("input_tokens", 0) >= sum(1379                v for v in input_token_details.values() if isinstance(v, int)1380            )1381        if "audio_output" in self.supported_usage_metadata_details["invoke"]:1382            msg = self.invoke_with_audio_output()1383            assert (usage_metadata := msg.usage_metadata) is not None1384            assert (1385                output_token_details := usage_metadata.get("output_token_details")1386            ) is not None1387            assert isinstance(output_token_details.get("audio"), int)1388            # Asserts that total output tokens are at least the sum of the token counts1389            assert usage_metadata.get("output_tokens", 0) >= sum(1390                v for v in output_token_details.values() if isinstance(v, int)1391            )1392        if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:1393            msg = self.invoke_with_reasoning_output()1394            assert (usage_metadata := msg.usage_metadata) is not None1395            assert (1396                output_token_details := usage_metadata.get("output_token_details")1397            ) is not None1398            assert isinstance(output_token_details.get("reasoning"), int)1399            # Asserts that total output tokens are at least the sum of the token counts1400            assert usage_metadata.get("output_tokens", 0) >= sum(1401                v for v in output_token_details.values() if isinstance(v, int)1402            )1403        if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:1404            msg = self.invoke_with_cache_read_input()1405            usage_metadata = msg.usage_metadata1406            assert usage_metadata is not None1407            input_token_details = usage_metadata.get("input_token_details")1408            assert input_token_details is not None1409            cache_read_tokens = input_token_details.get("cache_read")1410            assert isinstance(cache_read_tokens, int)1411            assert cache_read_tokens >= 01412            # Asserts that total input tokens are at least the sum of the token counts1413            total_detailed_tokens = sum(1414                v for v in input_token_details.values() if isinstance(v, int) and v >= 01415            )1416            input_tokens = usage_metadata.get("input_tokens", 0)1417            assert isinstance(input_tokens, int)1418            assert input_tokens >= total_detailed_tokens1419        if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:1420            msg = self.invoke_with_cache_creation_input()1421            usage_metadata = msg.usage_metadata1422            assert usage_metadata is not None1423            input_token_details = usage_metadata.get("input_token_details")1424            assert input_token_details is not None1425            cache_creation_tokens = input_token_details.get("cache_creation")1426            assert isinstance(cache_creation_tokens, int)1427            assert cache_creation_tokens >= 01428            # Asserts that total input tokens are at least the sum of the token counts1429            total_detailed_tokens = sum(1430                v for v in input_token_details.values() if isinstance(v, int) and v >= 01431            )1432            input_tokens = usage_metadata.get("input_tokens", 0)1433            assert isinstance(input_tokens, int)1434            assert input_tokens >= total_detailed_tokens14351436    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:1437        """Test usage metadata in streaming mode.14381439        Test to verify that the model returns correct usage metadata in streaming mode.14401441        !!! warning "Behavior changed in `langchain-tests` 0.3.17"14421443            Additionally check for the presence of `model_name` in the response1444            metadata, which is needed for usage tracking in callback handlers.14451446        ??? note "Configuration"14471448            By default, this test is run.1449            To disable this feature, set `returns_usage_metadata` to `False` in your1450            test class:14511452            ```python1453            class TestMyChatModelIntegration(ChatModelIntegrationTests):1454                @property1455                def returns_usage_metadata(self) -> bool:1456                    return False1457            ```14581459            This test can also check the format of specific kinds of usage metadata1460            based on the `supported_usage_metadata_details` property.14611462            This property should be configured as follows with the types of tokens that1463            the model supports tracking:14641465            ```python1466            class TestMyChatModelIntegration(ChatModelIntegrationTests):1467                @property1468                def supported_usage_metadata_details(self) -> dict:1469                    return {1470                        "invoke": [1471                            "audio_input",1472                            "audio_output",1473                            "reasoning_output",1474                            "cache_read_input",1475                            "cache_creation_input",1476                        ],1477                        "stream": [1478                            "audio_input",1479                            "audio_output",1480                            "reasoning_output",1481                            "cache_read_input",1482                            "cache_creation_input",1483                        ],1484                    }1485            ```14861487        ??? question "Troubleshooting"14881489            If this test fails, first verify that your model yields1490            `langchain_core.messages.ai.UsageMetadata` dicts1491            attached to the returned `AIMessage` object in `_stream`1492            that sum up to the total usage metadata.14931494            Note that `input_tokens` should only be included on one of the chunks1495            (typically the first or the last chunk), and the rest should have `0` or1496            `None` to avoid counting input tokens multiple times.14971498            `output_tokens` typically count the number of tokens in each chunk, not1499            the sum. This test will pass as long as the sum of `output_tokens` across1500            all chunks is not `0`.15011502            ```python1503            yield ChatResult(1504                generations=[1505                    ChatGeneration(1506                        message=AIMessage(1507                            content="Output text",1508                            usage_metadata={1509                                "input_tokens": (1510                                    num_input_tokens if is_first_chunk else 01511                                ),1512                                "output_tokens": 11,1513                                "total_tokens": (1514                                    11 + num_input_tokens if is_first_chunk else 111515                                ),1516                                "input_token_details": {1517                                    "audio": 10,1518                                    "cache_creation": 200,1519                                    "cache_read": 100,1520                                },1521                                "output_token_details": {1522                                    "audio": 10,1523                                    "reasoning": 200,1524                                },1525                            },1526                        )1527                    )1528                ]1529            )1530            ```15311532            Check also that the aggregated response includes a `model_name` key1533            in its `usage_metadata`.15341535        """1536        if not self.returns_usage_metadata:1537            pytest.skip("Not implemented.")15381539        full: AIMessageChunk | None = None1540        for chunk in model.stream("Write me 2 haikus. Only include the haikus."):1541            assert isinstance(chunk, AIMessageChunk)1542            # only one chunk is allowed to set usage_metadata.input_tokens1543            # if multiple do, it's likely a bug that will result in overcounting1544            # input tokens (since the total number of input tokens applies to the full1545            # generation, not individual chunks)1546            if full and full.usage_metadata and full.usage_metadata["input_tokens"]:1547                assert (1548                    not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]1549                ), (1550                    "Only one chunk should set input_tokens,"1551                    " the rest should be 0 or None"1552                )1553            # only one chunk is allowed to set usage_metadata.model_name1554            # if multiple do, they'll be concatenated incorrectly1555            if full and full.usage_metadata and full.usage_metadata.get("model_name"):1556                assert not chunk.usage_metadata or not chunk.usage_metadata.get(1557                    "model_name"1558                ), "Only one chunk should set model_name, the rest should be None"1559            full = chunk if full is None else full + chunk15601561        assert isinstance(full, AIMessageChunk)1562        assert full.usage_metadata is not None1563        assert isinstance(full.usage_metadata["input_tokens"], int)1564        assert isinstance(full.usage_metadata["output_tokens"], int)1565        assert isinstance(full.usage_metadata["total_tokens"], int)15661567        # Check model_name is in response_metadata1568        # Needed for langchain_core.callbacks.usage1569        model_name = full.response_metadata.get("model_name")1570        assert isinstance(model_name, str)1571        assert model_name, "model_name is empty"15721573        if "audio_input" in self.supported_usage_metadata_details["stream"]:1574            msg = self.invoke_with_audio_input(stream=True)1575            assert msg.usage_metadata is not None1576            assert isinstance(1577                msg.usage_metadata.get("input_token_details", {}).get("audio"), int1578            )1579        if "audio_output" in self.supported_usage_metadata_details["stream"]:1580            msg = self.invoke_with_audio_output(stream=True)1581            assert msg.usage_metadata is not None1582            assert isinstance(1583                msg.usage_metadata.get("output_token_details", {}).get("audio"), int1584            )1585        if "reasoning_output" in self.supported_usage_metadata_details["stream"]:1586            msg = self.invoke_with_reasoning_output(stream=True)1587            assert msg.usage_metadata is not None1588            assert isinstance(1589                msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int1590            )1591        if "cache_read_input" in self.supported_usage_metadata_details["stream"]:1592            msg = self.invoke_with_cache_read_input(stream=True)1593            assert msg.usage_metadata is not None1594            assert isinstance(1595                msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int1596            )1597        if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:1598            msg = self.invoke_with_cache_creation_input(stream=True)1599            assert msg.usage_metadata is not None1600            assert isinstance(1601                msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),1602                int,1603            )16041605    def test_stop_sequence(self, model: BaseChatModel) -> None:1606        """Test that model does not fail when invoked with the `stop` parameter.16071608        The `stop` parameter is a standard parameter for stopping generation at a1609        certain token.16101611        [More on standard parameters](https://python.langchain.com/docs/concepts/chat_models/#standard-parameters).16121613        This should pass for all integrations.16141615        ??? question "Troubleshooting"16161617            If this test fails, check that the function signature for `_generate`1618            (as well as `_stream` and async variants) accepts the `stop` parameter:16191620            ```python1621            def _generate(1622                self,1623                messages: List[BaseMessage],1624                stop: list[str] | None = None,1625                run_manager: CallbackManagerForLLMRun | None = None,1626                **kwargs: Any,1627            ) -> ChatResult:16281629            ```1630        """1631        result = model.invoke("hi", stop=["you"])1632        assert isinstance(result, AIMessage)16331634        custom_model = self.chat_model_class(1635            **{1636                **self.chat_model_params,1637                "stop": ["you"],1638            }1639        )1640        result = custom_model.invoke("hi")1641        assert isinstance(result, AIMessage)16421643    @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)1644    def test_tool_calling(self, model: BaseChatModel) -> None:1645        """Test that the model generates tool calls.16461647        This test is skipped if the `has_tool_calling` property on the test class is1648        set to `False`.16491650        This test is optional and should be skipped if the model does not support1651        tool calling (see configuration below).16521653        ??? note "Configuration"16541655            To disable tool calling tests, set `has_tool_calling` to `False` in your1656            test class:16571658            ```python1659            class TestMyChatModelIntegration(ChatModelIntegrationTests):1660                @property1661                def has_tool_calling(self) -> bool:1662                    return False1663            ```16641665        ??? question "Troubleshooting"16661667            If this test fails, check that `bind_tools` is implemented to correctly1668            translate LangChain tool objects into the appropriate schema for your1669            chat model.16701671            This test may fail if the chat model does not support a `tool_choice`1672            parameter. This parameter can be used to force a tool call. If1673            `tool_choice` is not supported and the model consistently fails this1674            test, you can `xfail` the test:16751676            ```python1677            @pytest.mark.xfail(reason=("Does not support tool_choice."))1678            def test_tool_calling(self, model: BaseChatModel) -> None:1679                super().test_tool_calling(model)1680            ```16811682            Otherwise, in the case that only one tool is bound, ensure that1683            `tool_choice` supports the string `'any'` to force calling that tool.16841685            If `tool_call_streaming = true` is set in the model's profile1686            augmentations, individual chunks are also validated to contain1687            `tool_call_chunk` blocks in `content_blocks`.16881689        """1690        if not self.has_tool_calling:1691            pytest.skip("Test requires tool calling.")16921693        tool_choice_value = None if not self.has_tool_choice else "any"1694        model_with_tools = model.bind_tools(1695            [magic_function], tool_choice=tool_choice_value1696        )16971698        # Test invoke1699        query = "What is the value of magic_function(3)? Use the tool."1700        result = model_with_tools.invoke(query)1701        _validate_tool_call_message(result)17021703        tool_call_streaming = (1704            model.profile.get("tool_call_streaming", False) if model.profile else False1705        )17061707        # Test stream1708        full: BaseMessage | None = None1709        found_tool_call_chunk = False1710        for chunk in model_with_tools.stream(query):1711            if tool_call_streaming and isinstance(chunk, AIMessageChunk):1712                found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1713            full = chunk if full is None else full + chunk  # type: ignore[assignment]1714        assert isinstance(full, AIMessage)1715        _validate_tool_call_message(full)17161717        if tool_call_streaming:1718            assert found_tool_call_chunk, (1719                "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1720                "least one chunk during streaming, but none were found. If this "1721                "model does not support streaming tool calls, set "1722                "tool_call_streaming=false in the model's profile augmentations."1723            )17241725    async def test_tool_calling_async(self, model: BaseChatModel) -> None:1726        """Test that the model generates tool calls.17271728        This test is skipped if the `has_tool_calling` property on the test class is1729        set to `False`.17301731        This test is optional and should be skipped if the model does not support1732        tool calling (see configuration below).17331734        ??? note "Configuration"17351736            To disable tool calling tests, set `has_tool_calling` to `False` in your1737            test class:17381739            ```python1740            class TestMyChatModelIntegration(ChatModelIntegrationTests):1741                @property1742                def has_tool_calling(self) -> bool:1743                    return False1744            ```17451746        ??? question "Troubleshooting"17471748            If this test fails, check that `bind_tools` is implemented to correctly1749            translate LangChain tool objects into the appropriate schema for your1750            chat model.17511752            This test may fail if the chat model does not support a `tool_choice`1753            parameter. This parameter can be used to force a tool call. If1754            `tool_choice` is not supported and the model consistently fails this1755            test, you can `xfail` the test:17561757            ```python1758            @pytest.mark.xfail(reason=("Does not support tool_choice."))1759            async def test_tool_calling_async(self, model: BaseChatModel) -> None:1760                await super().test_tool_calling_async(model)1761            ```17621763            Otherwise, in the case that only one tool is bound, ensure that1764            `tool_choice` supports the string `'any'` to force calling that tool.17651766            See `test_tool_calling` for `tool_call_streaming` profile configuration.17671768        """1769        if not self.has_tool_calling:1770            pytest.skip("Test requires tool calling.")17711772        tool_choice_value = None if not self.has_tool_choice else "any"1773        model_with_tools = model.bind_tools(1774            [magic_function], tool_choice=tool_choice_value1775        )17761777        # Test ainvoke1778        query = "What is the value of magic_function(3)? Use the tool."1779        result = await model_with_tools.ainvoke(query)1780        _validate_tool_call_message(result)17811782        tool_call_streaming = (1783            model.profile.get("tool_call_streaming", False) if model.profile else False1784        )17851786        # Test astream1787        full: BaseMessage | None = None1788        found_tool_call_chunk = False1789        async for chunk in model_with_tools.astream(query):1790            if tool_call_streaming and isinstance(chunk, AIMessageChunk):1791                found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1792            full = chunk if full is None else full + chunk  # type: ignore[assignment]1793        assert isinstance(full, AIMessage)1794        _validate_tool_call_message(full)17951796        if tool_call_streaming:1797            assert found_tool_call_chunk, (1798                "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1799                "least one chunk during streaming, but none were found. If this "1800                "model does not support streaming tool calls, set "1801                "tool_call_streaming=false in the model's profile augmentations."1802            )18031804    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:1805        """Test bind runnables as tools.18061807        Test that the model generates tool calls for tools that are derived from1808        LangChain runnables. This test is skipped if the `has_tool_calling` property1809        on the test class is set to `False`.18101811        This test is optional and should be skipped if the model does not support1812        tool calling (see configuration below).18131814        ??? note "Configuration"18151816            To disable tool calling tests, set `has_tool_calling` to `False` in your1817            test class:18181819            ```python1820            class TestMyChatModelIntegration(ChatModelIntegrationTests):1821                @property1822                def has_tool_calling(self) -> bool:1823                    return False1824            ```18251826        ??? question "Troubleshooting"18271828            If this test fails, check that `bind_tools` is implemented to correctly1829            translate LangChain tool objects into the appropriate schema for your1830            chat model.18311832            This test may fail if the chat model does not support a `tool_choice`1833            parameter. This parameter can be used to force a tool call. If1834            `tool_choice` is not supported, set `has_tool_choice` to `False` in1835            your test class:18361837            ```python1838            @property1839            def has_tool_choice(self) -> bool:1840                return False1841            ```18421843        """1844        if not self.has_tool_calling:1845            pytest.skip("Test requires tool calling.")18461847        prompt = ChatPromptTemplate.from_messages(1848            [("human", "Hello. Please respond in the style of {answer_style}.")]1849        )1850        llm = GenericFakeChatModel(messages=iter(["hello matey"]))1851        chain = prompt | llm | StrOutputParser()1852        tool_ = chain.as_tool(1853            name="greeting_generator",1854            description="Generate a greeting in a particular style of speaking.",1855        )1856        if self.has_tool_choice:1857            tool_choice: str | None = "any"1858        else:1859            tool_choice = None1860        model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)1861        query = "Using the tool, generate a Pirate greeting."1862        result = model_with_tools.invoke(query)1863        assert isinstance(result, AIMessage)1864        assert result.tool_calls1865        tool_call = result.tool_calls[0]1866        assert tool_call["args"].get("answer_style")1867        assert tool_call.get("type") == "tool_call"18681869    def test_tool_message_histories_string_content(1870        self, model: BaseChatModel, my_adder_tool: BaseTool1871    ) -> None:1872        """Test that message histories are compatible with string tool contents.18731874        For instance with OpenAI format contents.1875        If a model passes this test, it should be compatible1876        with messages generated from providers following OpenAI format.18771878        This test should be skipped if the model does not support tool calling1879        (see configuration below).18801881        ??? note "Configuration"18821883            To disable tool calling tests, set `has_tool_calling` to `False` in your1884            test class:18851886            ```python1887            class TestMyChatModelIntegration(ChatModelIntegrationTests):1888                @property1889                def has_tool_calling(self) -> bool:1890                    return False1891            ```18921893        ??? question "Troubleshooting"18941895            If this test fails, check that:18961897            1. The model can correctly handle message histories that include1898                `AIMessage` objects with `""` content.1899            2. The `tool_calls` attribute on `AIMessage` objects is correctly1900                handled and passed to the model in an appropriate format.1901            3. The model can correctly handle `ToolMessage` objects with string1902                content and arbitrary string values for `tool_call_id`.19031904            You can `xfail` the test if tool calling is implemented but this format1905            is not supported.19061907            ```python1908            @pytest.mark.xfail(reason=("Not implemented."))1909            def test_tool_message_histories_string_content(self, *args: Any) -> None:1910                super().test_tool_message_histories_string_content(*args)1911            ```1912        """1913        if not self.has_tool_calling:1914            pytest.skip("Test requires tool calling.")19151916        model_with_tools = model.bind_tools([my_adder_tool])1917        function_name = "my_adder_tool"1918        function_args = {"a": 1, "b": 2}19191920        messages_string_content = [1921            HumanMessage("What is 1 + 2"),1922            # string content (e.g. OpenAI)1923            AIMessage(1924                "",1925                tool_calls=[1926                    {1927                        "name": function_name,1928                        "args": function_args,1929                        "id": "abc123",1930                        "type": "tool_call",1931                    },1932                ],1933            ),1934            ToolMessage(1935                json.dumps({"result": 3}),1936                name=function_name,1937                tool_call_id="abc123",1938            ),1939        ]1940        result_string_content = model_with_tools.invoke(messages_string_content)1941        assert isinstance(result_string_content, AIMessage)19421943    def test_tool_message_histories_list_content(1944        self,1945        model: BaseChatModel,1946        my_adder_tool: BaseTool,1947    ) -> None:1948        """Test that message histories are compatible with list tool contents.19491950        For instance with Anthropic format contents.19511952        These message histories will include `AIMessage` objects with "tool use" and1953        content blocks, e.g.,19541955        ```python1956        [1957            {"type": "text", "text": "Hmm let me think about that"},1958            {1959                "type": "tool_use",1960                "input": {"fav_color": "green"},1961                "id": "foo",1962                "name": "color_picker",1963            },1964        ]1965        ```19661967        This test should be skipped if the model does not support tool calling1968        (see configuration below).19691970        ??? note "Configuration"19711972            To disable tool calling tests, set `has_tool_calling` to `False` in your1973            test class:19741975            ```python1976            class TestMyChatModelIntegration(ChatModelIntegrationTests):1977                @property1978                def has_tool_calling(self) -> bool:1979                    return False1980            ```19811982        ??? question "Troubleshooting"19831984            If this test fails, check that:19851986            1. The model can correctly handle message histories that include1987                `AIMessage` objects with list content.1988            2. The `tool_calls` attribute on `AIMessage` objects is correctly1989                handled and passed to the model in an appropriate format.1990            3. The model can correctly handle ToolMessage objects with string content1991                and arbitrary string values for `tool_call_id`.19921993            You can `xfail` the test if tool calling is implemented but this format1994            is not supported.19951996            ```python1997            @pytest.mark.xfail(reason=("Not implemented."))1998            def test_tool_message_histories_list_content(self, *args: Any) -> None:1999                super().test_tool_message_histories_list_content(*args)2000            ```

Findings

✓ No findings reported for this file.

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.