1"""Integration tests for chat models."""23from __future__ import annotations45import base646import json7import os8import warnings9from typing import TYPE_CHECKING, Annotated, Any, Literal10from unittest.mock import MagicMock1112import httpx13import pytest14from langchain_core.callbacks import BaseCallbackHandler15from langchain_core.language_models import BaseChatModel, GenericFakeChatModel16from langchain_core.language_models.chat_model_stream import (17 AsyncChatModelStream,18 ChatModelStream,19)20from langchain_core.messages import (21 AIMessage,22 AIMessageChunk,23 BaseMessage,24 HumanMessage,25 SystemMessage,26 ToolMessage,27)28from langchain_core.output_parsers import StrOutputParser29from langchain_core.prompts import ChatPromptTemplate30from langchain_core.tools import BaseTool, tool31from langchain_core.utils.function_calling import (32 convert_to_json_schema,33 tool_example_to_messages,34)35from pydantic import BaseModel, Field36from pydantic.v1 import BaseModel as BaseModelV137from pydantic.v1 import Field as FieldV138from typing_extensions import TypedDict, override3940from langchain_tests.unit_tests.chat_models import ChatModelTests41from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION42from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream4344if TYPE_CHECKING:45 from pytest_benchmark.fixture import (46 BenchmarkFixture,47 )48 from vcr.cassette import Cassette495051def _get_joke_class( # noqa: RET50352 schema_type: Literal["pydantic", "typeddict", "json_schema"],53) -> Any:54 class Joke(BaseModel):55 """Joke to tell user."""5657 setup: str = Field(description="question to set up a joke")58 punchline: str = Field(description="answer to resolve the joke")5960 def validate_joke(result: Any) -> bool:61 return isinstance(result, Joke)6263 class JokeDict(TypedDict):64 """Joke to tell user."""6566 setup: Annotated[str, ..., "question to set up a joke"]67 punchline: Annotated[str, ..., "answer to resolve the joke"]6869 def validate_joke_dict(result: Any) -> bool:70 return all(key in {"setup", "punchline"} for key in result)7172 if schema_type == "pydantic":73 return Joke, validate_joke7475 if schema_type == "typeddict":76 return JokeDict, validate_joke_dict7778 if schema_type == "json_schema":79 return Joke.model_json_schema(), validate_joke_dict808182class _TestCallbackHandler(BaseCallbackHandler):83 options: list[dict[str, Any] | None]8485 def __init__(self) -> None:86 super().__init__()87 self.options = []8889 @override90 def on_chat_model_start(91 self,92 serialized: Any,93 messages: Any,94 *,95 options: dict[str, Any] | None = None,96 **kwargs: Any,97 ) -> None:98 self.options.append(options)99100101class _MagicFunctionSchema(BaseModel):102 input: int = Field(..., gt=-1000, lt=1000)103104105@tool(args_schema=_MagicFunctionSchema)106def magic_function(_input: int) -> int:107 """Apply a magic function to an input."""108 return _input + 2109110111@tool112def magic_function_no_args() -> int:113 """Calculate a magic function."""114 return 5115116117def _validate_tool_call_message(message: BaseMessage) -> None:118 assert isinstance(message, AIMessage)119 assert len(message.tool_calls) == 1120121 tool_call = message.tool_calls[0]122 assert tool_call["name"] == "magic_function"123 assert tool_call["args"] == {"input": 3}124 assert tool_call["id"] is not None125 assert tool_call.get("type") == "tool_call"126127 content_tool_calls = [128 block for block in message.content_blocks if block["type"] == "tool_call"129 ]130 assert len(content_tool_calls) == 1131 content_tool_call = content_tool_calls[0]132 assert content_tool_call["name"] == "magic_function"133 assert content_tool_call["args"] == {"input": 3}134 assert content_tool_call["id"] is not None135136137def _validate_tool_call_chunk(chunk: AIMessageChunk) -> bool:138 """Check whether a streaming chunk contains valid `tool_call_chunk` blocks.139140 Returns:141 `True` if at least one `tool_call_chunk` block was found.142 """143 found = False144 for block in chunk.content_blocks:145 if block.get("type") == "tool_call_chunk":146 found = True147 assert "name" in block, "tool_call_chunk block missing 'name' field"148 assert "args" in block, "tool_call_chunk block missing 'args' field"149 assert "id" in block, "tool_call_chunk block missing 'id' field"150 return found151152153def _validate_tool_call_message_no_args(message: BaseMessage) -> None:154 assert isinstance(message, AIMessage)155 assert len(message.tool_calls) == 1156157 tool_call = message.tool_calls[0]158 assert tool_call["name"] == "magic_function_no_args"159 assert tool_call["args"] == {}160 assert tool_call["id"] is not None161 assert tool_call.get("type") == "tool_call"162163164def _get_base64_from_url(url: str) -> str:165 user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT")166 if not user_agent:167 warning_message = (168 "LANGCHAIN_TESTS_USER_AGENT environment variable not set. "169 "langchain-tests pulls (CC0 License) audio data from wikimedia.org. "170 "Consider setting a user agent to identify your requests. See "171 "https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy"172 )173 warnings.warn(warning_message, stacklevel=2)174 headers = {"User-Agent": user_agent} if user_agent else {}175 httpx_response = httpx.get(url, headers=headers, timeout=10.0).content176 return base64.b64encode(httpx_response).decode("utf-8")177178179@tool180def unicode_customer(customer_name: str, description: str) -> str:181 """Tool for creating a customer with Unicode name.182183 Args:184 customer_name: The customer's name in their native language.185 description: Description of the customer.186187 Returns:188 A confirmation message about the customer creation.189190 """191 return f"Created customer: {customer_name} - {description}"192193194class ChatModelIntegrationTests(ChatModelTests):195 '''Base class for chat model integration tests.196197 Test subclasses must implement the `chat_model_class` and198 `chat_model_params` properties to specify what model to test and its199 initialization parameters.200201 ```python202 from typing import Type203204 from langchain_tests.integration_tests import ChatModelIntegrationTests205 from my_package.chat_models import MyChatModel206207208 class TestMyChatModelIntegration(ChatModelIntegrationTests):209 @property210 def chat_model_class(self) -> Type[MyChatModel]:211 # Return the chat model class to test here212 return MyChatModel213214 @property215 def chat_model_params(self) -> dict:216 # Return initialization parameters for the model.217 return {"model": "model-001", "temperature": 0}218 ```219220 !!! note221 API references for individual test methods include troubleshooting tips.222223224 Test subclasses **must** implement the following two properties:225226 `chat_model_class`: The chat model class to test, e.g., `ChatParrotLink`.227228 ```python229 @property230 def chat_model_class(self) -> Type[ChatParrotLink]:231 return ChatParrotLink232 ```233234 `chat_model_params`: Initialization parameters for the chat model.235236 ```python237 @property238 def chat_model_params(self) -> dict:239 return {"model": "bird-brain-001", "temperature": 0}240 ```241242 In addition, test subclasses can control what features are tested (such as tool243 calling or multi-modality) by selectively overriding the following properties.244245 Expand to see details:246247 ???+ info "`has_tool_calling`"248249 Boolean property indicating whether the chat model supports tool calling.250251 By default, this is determined by whether the chat model's `bind_tools` method252 is overridden. It typically does not need to be overridden on the test class.253254 ```python255 @property256 def has_tool_calling(self) -> bool:257 return True258 ```259260 ??? info "`has_tool_choice`"261262 Boolean property indicating whether the chat model supports forcing tool263 calling via a `tool_choice` parameter.264265 By default, this is determined by whether the parameter is included in the266 signature for the corresponding `bind_tools` method.267268 If `True`, the minimum requirement for this feature is that269 `tool_choice='any'` will force a tool call, and `tool_choice=<tool name>`270 will force a call to a specific tool.271272 ```python273 @property274 def has_tool_choice(self) -> bool:275 return False276 ```277278 ??? info "`has_structured_output`"279280 Boolean property indicating whether the chat model supports structured281 output.282283 By default, this is determined by whether the chat model's284 `with_structured_output` method is overridden. If the base implementation is285 intended to be used, this method should be overridden.286287 See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).288289 ```python290 @property291 def has_structured_output(self) -> bool:292 return True293 ```294295 ??? info "`structured_output_kwargs`"296297 Dict property specifying additional kwargs to pass to298 `with_structured_output()` when running structured output tests.299300 Override this to customize how your model generates structured output.301302 The most common use case is specifying the `method` parameter:303304 - `'function_calling'`: Uses tool/function calling to enforce the schema.305 - `'json_mode'`: Uses the model's JSON mode.306 - `'json_schema'`: Uses native JSON schema support (e.g., OpenAI's structured307 outputs).308309 ```python310 @property311 def structured_output_kwargs(self) -> dict:312 return {"method": "json_schema"}313 ```314315 ??? info "`supports_json_mode`"316317 Boolean property indicating whether the chat model supports318 `method='json_mode'` in `with_structured_output`.319320 Defaults to `False`.321322 JSON mode constrains the model to output valid JSON without enforcing323 a specific schema (unlike `'function_calling'` or `'json_schema'` methods).324325 When using JSON mode, you must prompt the model to output JSON in your326 message.327328 !!! example329330 ```python331 structured_llm = llm.with_structured_output(MySchema, method="json_mode")332 structured_llm.invoke("... Return the result as JSON.")333 ```334335 See docs for [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output).336337 ```python338 @property339 def supports_json_mode(self) -> bool:340 return True341 ```342343 ??? info "`supports_image_inputs`"344345 Boolean property indicating whether the chat model supports image inputs.346347 Defaults to `False`.348349 If set to `True`, the chat model will be tested by inputting an350 `ImageContentBlock` with the shape:351352 ```python353 {354 "type": "image",355 "base64": "<base64 image data>",356 "mime_type": "image/jpeg", # or appropriate MIME type357 }358 ```359360 In addition to OpenAI-style content blocks:361362 ```python363 {364 "type": "image_url",365 "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},366 }367 ```368369 See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).370371 ```python372 @property373 def supports_image_inputs(self) -> bool:374 return True375 ```376377 ??? info "`supports_image_urls`"378379 Boolean property indicating whether the chat model supports image inputs from380 URLs.381382 Defaults to `False`.383384 If set to `True`, the chat model will be tested using content blocks of the385 form386387 ```python388 {389 "type": "image",390 "url": "https://...",391 }392 ```393394 See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).395396 ```python397 @property398 def supports_image_urls(self) -> bool:399 return True400 ```401402 ??? info "`supports_image_tool_message`"403404 Boolean property indicating whether the chat model supports a `ToolMessage`405 that includes image content, e.g. in the OpenAI Chat Completions format.406407 Defaults to `False`.408409 ```python410 ToolMessage(411 content=[412 {413 "type": "image_url",414 "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},415 },416 ],417 tool_call_id="1",418 name="random_image",419 )420 ```421422 ...as well as the LangChain `ImageContentBlock` format:423424 ```python425 ToolMessage(426 content=[427 {428 "type": "image",429 "base64": image_data,430 "mime_type": "image/jpeg",431 },432 ],433 tool_call_id="1",434 name="random_image",435 )436 ```437438 If set to `True`, the chat model will be tested with message sequences that439 include `ToolMessage` objects of this form.440441 ```python442 @property443 def supports_image_tool_message(self) -> bool:444 return True445 ```446447 ??? info "`supports_pdf_inputs`"448449 Boolean property indicating whether the chat model supports PDF inputs.450451 Defaults to `False`.452453 If set to `True`, the chat model will be tested by inputting a454 `FileContentBlock` with the shape:455456 ```python457 {458 "type": "file",459 "base64": "<base64 file data>",460 "mime_type": "application/pdf",461 }462 ```463464 See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).465466 ```python467 @property468 def supports_pdf_inputs(self) -> bool:469 return True470 ```471472 ??? info "`supports_pdf_tool_message`"473474 Boolean property indicating whether the chat model supports a `ToolMessage`475 that includes PDF content using the LangChain `FileContentBlock` format.476477 Defaults to `False`.478479 ```python480 ToolMessage(481 content=[482 {483 "type": "file",484 "base64": pdf_data,485 "mime_type": "application/pdf",486 },487 ],488 tool_call_id="1",489 name="random_pdf",490 )491 ```492493 If set to `True`, the chat model will be tested with message sequences that494 include `ToolMessage` objects of this form.495496 ```python497 @property498 def supports_pdf_tool_message(self) -> bool:499 return True500 ```501502 ??? info "`supports_audio_inputs`"503504 Boolean property indicating whether the chat model supports audio inputs.505506 Defaults to `False`.507508 If set to `True`, the chat model will be tested by inputting an509 `AudioContentBlock` with the shape:510511 ```python512 {513 "type": "audio",514 "base64": "<base64 audio data>",515 "mime_type": "audio/wav", # or appropriate MIME type516 }517 ```518519 See docs for [Multimodality](https://docs.langchain.com/oss/python/langchain/models#multimodal).520521 ```python522 @property523 def supports_audio_inputs(self) -> bool:524 return True525 ```526527 !!! warning528 This test downloads audio data from wikimedia.org. You may need to set the529 `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these tests,530 e.g.,531532 ```bash533 export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"534 ```535536 Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).537538 ??? info "`supports_video_inputs`"539540 Boolean property indicating whether the chat model supports image inputs.541542 Defaults to `False`.543544 No current tests are written for this feature.545546 ??? info "`returns_usage_metadata`"547548 Boolean property indicating whether the chat model returns usage metadata549 on invoke and streaming responses.550551 Defaults to `True`.552553 `usage_metadata` is an optional dict attribute on `AIMessage` objects that track554 input and output tokens.555556 [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).557558 ```python559 @property560 def returns_usage_metadata(self) -> bool:561 return False562 ```563564 Models supporting `usage_metadata` should also return the name of the underlying565 model in the `response_metadata` of the `AIMessage`.566567 ??? info "`supports_anthropic_inputs`"568569 Boolean property indicating whether the chat model supports Anthropic-style570 inputs.571572 Defaults to `False`.573574 These inputs might feature "tool use" and "tool result" content blocks, e.g.,575576 ```python577 [578 {"type": "text", "text": "Hmm let me think about that"},579 {580 "type": "tool_use",581 "input": {"fav_color": "green"},582 "id": "foo",583 "name": "color_picker",584 },585 ]586 ```587588 If set to `True`, the chat model will be tested using content blocks of this589 form.590591 ```python592 @property593 def supports_anthropic_inputs(self) -> bool:594 return True595 ```596597 ??? info "`supported_usage_metadata_details`"598599 Property controlling what usage metadata details are emitted in both invoke600 and stream.601602 Defaults to `{"invoke": [], "stream": []}`.603604 `usage_metadata` is an optional dict attribute on `AIMessage` objects that track605 input and output tokens.606607 [See more](https://reference.langchain.com/python/langchain_core/language_models/#langchain_core.messages.ai.UsageMetadata).608609 It includes optional keys `input_token_details` and `output_token_details`610 that can track usage details associated with special types of tokens, such as611 cached, audio, or reasoning.612613 Only needs to be overridden if these details are supplied.614615 ??? info "`enable_vcr_tests`"616617 Property controlling whether to enable select tests that rely on618 [VCR](https://vcrpy.readthedocs.io/en/latest/) caching of HTTP calls, such619 as benchmarking tests.620621 Defaults to `False`.622623 To enable these tests, follow these steps:624625 1. Override the `enable_vcr_tests` property to return `True`:626627 ```python628 @property629 def enable_vcr_tests(self) -> bool:630 return True631 ```632633 2. Configure VCR to exclude sensitive headers and other information from634 cassettes.635636 !!! warning637 VCR will by default record authentication headers and other sensitive638 information in cassettes. Read below for how to configure what639 information is recorded in cassettes.640641 To add configuration to VCR, add a `conftest.py` file to the `tests/`642 directory and implement the `vcr_config` fixture there.643644 `langchain-tests` excludes the headers `'authorization'`,645 `'x-api-key'`, and `'api-key'` from VCR cassettes. To pick up this646 configuration, you will need to add `conftest.py` as shown below. You can647 also exclude additional headers, override the default exclusions, or apply648 other customizations to the VCR configuration. See example below:649650 ```python title="tests/conftest.py"651 import pytest652 from langchain_tests.conftest import base_vcr_config653654 _EXTRA_HEADERS = [655 # Specify additional headers to redact656 ("user-agent", "PLACEHOLDER"),657 ]658659660 def remove_response_headers(response: dict) -> dict:661 # If desired, remove or modify headers in the response.662 response["headers"] = {}663 return response664665666 @pytest.fixture(scope="session")667 def vcr_config() -> dict:668 """Extend the default configuration from langchain_tests."""669 config = base_vcr_config()670 config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)671 config["before_record_response"] = remove_response_headers672673 return config674 ```675676 ??? note "Compressing cassettes"677678 `langchain-tests` includes a custom VCR serializer that compresses679 cassettes using gzip. To use it, register the `yaml.gz` serializer680 to your VCR fixture and enable this serializer in the config. See681 example below:682683 ```python title="tests/conftest.py"684 import pytest685 from langchain_tests.conftest import (686 CustomPersister,687 CustomSerializer,688 )689 from langchain_tests.conftest import base_vcr_config690 from vcr import VCR691692 _EXTRA_HEADERS = [693 # Specify additional headers to redact694 ("user-agent", "PLACEHOLDER"),695 ]696697698 def remove_response_headers(response: dict) -> dict:699 # If desired, remove or modify headers in the response.700 response["headers"] = {}701 return response702703704 @pytest.fixture(scope="session")705 def vcr_config() -> dict:706 """Extend the default configuration from langchain_tests."""707 config = base_vcr_config()708 config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)709 config["before_record_response"] = remove_response_headers710 # New: enable serializer and set file extension711 config["serializer"] = "yaml.gz"712 config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")713714 return config715716717 def pytest_recording_configure(config: dict, vcr: VCR) -> None:718 vcr.register_persister(CustomPersister())719 vcr.register_serializer("yaml.gz", CustomSerializer())720 ```721722 You can inspect the contents of the compressed cassettes (e.g., to723 ensure no sensitive information is recorded) using724725 ```bash726 gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz727 ```728729 ...or by using the serializer:730731 ```python732 from langchain_tests.conftest import (733 CustomPersister,734 CustomSerializer,735 )736737 cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"738 requests, responses = CustomPersister().load_cassette(739 path, CustomSerializer()740 )741 ```742743 3. Run tests to generate VCR cassettes.744745 ```bash title="Example"746 uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time747 ```748749 This will generate a VCR cassette for the test in750 `tests/integration_tests/cassettes/`.751752 !!! warning753 You should inspect the generated cassette to ensure that it does not754 contain sensitive information. If it does, you can modify the755 `vcr_config` fixture to exclude headers or modify the response756 before it is recorded.757758 You can then commit the cassette to your repository. Subsequent test runs759 will use the cassette instead of making HTTP calls.760 ''' # noqa: E501761762 @property763 def standard_chat_model_params(self) -> dict[str, Any]:764 """Standard parameters for chat model."""765 return {}766767 def test_invoke(self, model: BaseChatModel) -> None:768 """Test to verify that `model.invoke(simple_message)` works.769770 This should pass for all integrations.771772 ??? question "Troubleshooting"773774 If this test fails, you should make sure your `_generate` method775 does not raise any exceptions, and that it returns a valid776 `langchain_core.outputs.chat_result.ChatResult` like so:777778 ```python779 return ChatResult(780 generations=[ChatGeneration(message=AIMessage(content="Output text"))]781 )782 ```783784 """785 result = model.invoke("Hello")786 assert result is not None787 assert isinstance(result, AIMessage)788 assert isinstance(result.text, str)789 assert len(result.content) > 0790791 async def test_ainvoke(self, model: BaseChatModel) -> None:792 """Test to verify that `await model.ainvoke(simple_message)` works.793794 This should pass for all integrations. Passing this test does not indicate795 a "natively async" implementation, but rather that the model can be used796 in an async context.797798 ??? question "Troubleshooting"799800 First, debug801 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.802 because `ainvoke` has a default implementation that calls `invoke` in an803 async context.804805 If that test passes but not this one, you should make sure your `_agenerate`806 method does not raise any exceptions, and that it returns a valid807 `langchain_core.outputs.chat_result.ChatResult` like so:808809 ```python810 return ChatResult(811 generations=[ChatGeneration(message=AIMessage(content="Output text"))]812 )813 ```814 """815 result = await model.ainvoke("Hello")816 assert result is not None817 assert isinstance(result, AIMessage)818 assert isinstance(result.text, str)819 assert len(result.content) > 0820821 @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)822 def test_stream(self, model: BaseChatModel) -> None:823 """Test to verify that `model.stream(simple_message)` works.824825 This should pass for all integrations. Passing this test does not indicate826 a "streaming" implementation, but rather that the model can be used in a827 streaming context.828829 ??? question "Troubleshooting"830831 First, debug832 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.833 because `stream` has a default implementation that calls `invoke` and834 yields the result as a single chunk.835836 If that test passes but not this one, you should make sure your `_stream`837 method does not raise any exceptions, and that it yields valid838 `langchain_core.outputs.chat_generation.ChatGenerationChunk`839 objects like so:840841 ```python842 yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))843 ```844845 The final chunk must have `chunk_position='last'` to signal stream846 completion. This enables proper parsing of `tool_call_chunks` into847 `tool_calls` on the aggregated message:848849 ```python850 for i, token in enumerate(tokens):851 is_last = i == len(tokens) - 1852 yield ChatGenerationChunk(853 message=AIMessageChunk(854 content=token,855 chunk_position="last" if is_last else None,856 )857 )858 ```859 """860 chunks: list[AIMessageChunk] = []861 full: AIMessageChunk | None = None862 for chunk in model.stream("Hello"):863 assert chunk is not None864 assert isinstance(chunk, AIMessageChunk)865 assert isinstance(chunk.content, str | list)866 chunks.append(chunk)867 full = chunk if full is None else full + chunk868 assert len(chunks) > 0869 assert isinstance(full, AIMessageChunk)870 assert full.content871 assert full.text872 # Exactly one text block — guards against merge bugs that would produce873 # multiple adjacent text blocks in the aggregated result.874 text_blocks = [b for b in full.content_blocks if b["type"] == "text"]875 assert len(text_blocks) == 1876877 # Verify chunk_position signaling878 last_chunk = chunks[-1]879 assert last_chunk.chunk_position == "last", (880 f"Final chunk must have chunk_position='last', "881 f"got {last_chunk.chunk_position!r}"882 )883884 @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)885 async def test_astream(self, model: BaseChatModel) -> None:886 """Test to verify that `await model.astream(simple_message)` works.887888 This should pass for all integrations. Passing this test does not indicate889 a "natively async" or "streaming" implementation, but rather that the model can890 be used in an async streaming context.891892 ??? question "Troubleshooting"893894 First, debug895 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.896 and897 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.898 because `astream` has a default implementation that calls `_stream` in899 an async context if it is implemented, or `ainvoke` and yields the result900 as a single chunk if not.901902 If those tests pass but not this one, you should make sure your `_astream`903 method does not raise any exceptions, and that it yields valid904 `langchain_core.outputs.chat_generation.ChatGenerationChunk`905 objects like so:906907 ```python908 yield ChatGenerationChunk(message=AIMessageChunk(content="chunk text"))909 ```910911 See `test_stream` troubleshooting for `chunk_position` requirements.912 """913 chunks: list[AIMessageChunk] = []914 full: AIMessageChunk | None = None915 async for chunk in model.astream("Hello"):916 assert chunk is not None917 assert isinstance(chunk, AIMessageChunk)918 assert isinstance(chunk.content, str | list)919 chunks.append(chunk)920 full = chunk if full is None else full + chunk921 assert len(chunks) > 0922 assert isinstance(full, AIMessageChunk)923 assert full.content924 assert full.text925 # Exactly one text block — guards against merge bugs that would produce926 # multiple adjacent text blocks in the aggregated result.927 text_blocks = [b for b in full.content_blocks if b["type"] == "text"]928 assert len(text_blocks) == 1929930 # Verify chunk_position signaling931 last_chunk = chunks[-1]932 assert last_chunk.chunk_position == "last", (933 f"Final chunk must have chunk_position='last', "934 f"got {last_chunk.chunk_position!r}"935 )936937 def test_stream_events_v3(self, model: BaseChatModel) -> None:938 """Test that `model.stream_events("Hello", version="v3")` works.939940 Exercises the content-block-centric streaming protocol. Passing this941 test indicates the model participates in `stream_events(version="v3")` either942 natively (via `_stream_chat_model_events`) or through the compat bridge that943 converts `_stream` chunks into protocol events.944945 ??? question "Troubleshooting"946947 First, debug948 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`949 — `stream_events(version="v3")` falls back to the same950 `_stream` path via the compat bridge when the model does not951 implement952 `_stream_chat_model_events`. If `test_stream` passes but this does953 not, inspect the raised lifecycle violation: it identifies the954 event index and the rule broken.955 """956 stream = model.stream_events("Hello", version="v3")957 assert isinstance(stream, ChatModelStream)958959 events = list(stream)960 assert len(events) > 0961 assert_valid_event_stream(events)962963 message = stream.output964 assert isinstance(message, AIMessage)965 assert message.content966 assert message.text967 assert any(block["type"] == "text" for block in message.content_blocks)968 # `stream_events(version="v3")` always assembles content as v1 protocol blocks.969 assert message.response_metadata.get("output_version") == "v1"970971 async def test_astream_events_v3(self, model: BaseChatModel) -> None:972 """Test that `await model.astream_events("Hello", version="v3")` works.973974 Async counterpart to `test_stream_events_v3`. Exercises the975 `AsyncChatModelStream` path end-to-end: the background producer task,976 replay-buffer-backed event iteration, and the awaitable `output`977 projection.978979 ??? question "Troubleshooting"980981 First, debug982 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_astream`.983 If `test_astream` passes but this does not, inspect the raised984 lifecycle violation; it identifies the event index and the rule985 broken.986 """987 stream = await model.astream_events("Hello", version="v3")988 assert isinstance(stream, AsyncChatModelStream)989990 events = [event async for event in stream]991 assert len(events) > 0992 assert_valid_event_stream(events)993994 message = await stream.output995 assert isinstance(message, AIMessage)996 assert message.content997 assert message.text998 assert any(block["type"] == "text" for block in message.content_blocks)999 assert message.response_metadata.get("output_version") == "v1"10001001 def test_invoke_with_model_override(self, model: BaseChatModel) -> None:1002 """Test that model name can be overridden at invoke time via kwargs.10031004 This enables dynamic model selection without creating new instances,1005 which is useful for fallback strategies, A/B testing, or cost optimization.10061007 Test is skipped if `supports_model_override` is `False`.10081009 ??? question "Troubleshooting"10101011 If this test fails, ensure that your `_generate` method passes1012 `**kwargs` through to the API request payload in a way that allows1013 the `model` parameter to be overridden.10141015 For example:1016 ```python1017 def _get_request_payload(self, ..., **kwargs) -> dict:1018 return {1019 "model": self.model,1020 ...1021 **kwargs, # kwargs should come last to allow overrides1022 }1023 ```1024 """1025 if not self.supports_model_override:1026 pytest.skip("Model override not supported.")10271028 override_model = self.model_override_value1029 if not override_model:1030 pytest.skip("model_override_value not specified.")10311032 result = model.invoke("Hello", model=override_model)1033 assert result is not None1034 assert isinstance(result, AIMessage)10351036 # Verify the overridden model was used1037 model_name = result.response_metadata.get("model_name")1038 assert model_name is not None, "model_name not found in response_metadata"1039 assert override_model in model_name, (1040 f"Expected model '{override_model}' but got '{model_name}'"1041 )10421043 async def test_ainvoke_with_model_override(self, model: BaseChatModel) -> None:1044 """Test that model name can be overridden at ainvoke time via kwargs.10451046 Test is skipped if `supports_model_override` is `False`.10471048 ??? question "Troubleshooting"10491050 See troubleshooting for `test_invoke_with_model_override`.1051 """1052 if not self.supports_model_override:1053 pytest.skip("Model override not supported.")10541055 override_model = self.model_override_value1056 if not override_model:1057 pytest.skip("model_override_value not specified.")10581059 result = await model.ainvoke("Hello", model=override_model)1060 assert result is not None1061 assert isinstance(result, AIMessage)10621063 # Verify the overridden model was used1064 model_name = result.response_metadata.get("model_name")1065 assert model_name is not None, "model_name not found in response_metadata"1066 assert override_model in model_name, (1067 f"Expected model '{override_model}' but got '{model_name}'"1068 )10691070 def test_stream_with_model_override(self, model: BaseChatModel) -> None:1071 """Test that model name can be overridden at stream time via kwargs.10721073 Test is skipped if `supports_model_override` is `False`.10741075 ??? question "Troubleshooting"10761077 See troubleshooting for `test_invoke_with_model_override`.1078 """1079 if not self.supports_model_override:1080 pytest.skip("Model override not supported.")10811082 override_model = self.model_override_value1083 if not override_model:1084 pytest.skip("model_override_value not specified.")10851086 full: AIMessageChunk | None = None1087 for chunk in model.stream("Hello", model=override_model):1088 assert isinstance(chunk, AIMessageChunk)1089 full = chunk if full is None else full + chunk10901091 assert full is not None10921093 # Verify the overridden model was used1094 model_name = full.response_metadata.get("model_name")1095 assert model_name is not None, "model_name not found in response_metadata"1096 assert override_model in model_name, (1097 f"Expected model '{override_model}' but got '{model_name}'"1098 )10991100 async def test_astream_with_model_override(self, model: BaseChatModel) -> None:1101 """Test that model name can be overridden at astream time via kwargs.11021103 Test is skipped if `supports_model_override` is `False`.11041105 ??? question "Troubleshooting"11061107 See troubleshooting for `test_invoke_with_model_override`.1108 """1109 if not self.supports_model_override:1110 pytest.skip("Model override not supported.")11111112 override_model = self.model_override_value1113 if not override_model:1114 pytest.skip("model_override_value not specified.")11151116 full: AIMessageChunk | None = None1117 async for chunk in model.astream("Hello", model=override_model):1118 assert isinstance(chunk, AIMessageChunk)1119 full = chunk if full is None else full + chunk11201121 assert full is not None11221123 # Verify the overridden model was used1124 model_name = full.response_metadata.get("model_name")1125 assert model_name is not None, "model_name not found in response_metadata"1126 assert override_model in model_name, (1127 f"Expected model '{override_model}' but got '{model_name}'"1128 )11291130 def test_batch(self, model: BaseChatModel) -> None:1131 """Test to verify that `model.batch([messages])` works.11321133 This should pass for all integrations. Tests the model's ability to process1134 multiple prompts in a single batch.11351136 ??? question "Troubleshooting"11371138 First, debug1139 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1140 because `batch` has a default implementation that calls `invoke` for1141 each message in the batch.11421143 If that test passes but not this one, you should make sure your `batch`1144 method does not raise any exceptions, and that it returns a list of valid1145 `AIMessage` objects.11461147 """1148 batch_results = model.batch(["Hello", "Hey"])1149 assert batch_results is not None1150 assert isinstance(batch_results, list)1151 assert len(batch_results) == 21152 for result in batch_results:1153 assert result is not None1154 assert isinstance(result, AIMessage)1155 assert isinstance(result.text, str)1156 assert len(result.content) > 011571158 async def test_abatch(self, model: BaseChatModel) -> None:1159 """Test to verify that `await model.abatch([messages])` works.11601161 This should pass for all integrations. Tests the model's ability to process1162 multiple prompts in a single batch asynchronously.11631164 ??? question "Troubleshooting"11651166 First, debug1167 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`1168 and1169 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`1170 because `abatch` has a default implementation that calls `ainvoke` for1171 each message in the batch.11721173 If those tests pass but not this one, you should make sure your `abatch`1174 method does not raise any exceptions, and that it returns a list of valid1175 `AIMessage` objects.11761177 """1178 batch_results = await model.abatch(["Hello", "Hey"])1179 assert batch_results is not None1180 assert isinstance(batch_results, list)1181 assert len(batch_results) == 21182 for result in batch_results:1183 assert result is not None1184 assert isinstance(result, AIMessage)1185 assert isinstance(result.text, str)1186 assert len(result.content) > 011871188 def test_conversation(self, model: BaseChatModel) -> None:1189 """Test to verify that the model can handle multi-turn conversations.11901191 This should pass for all integrations. Tests the model's ability to process1192 a sequence of alternating `HumanMessage` and `AIMessage` objects as context for1193 generating the next response.11941195 ??? question "Troubleshooting"11961197 First, debug1198 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1199 because this test also uses `model.invoke`.12001201 If that test passes but not this one, you should verify that:12021203 1. Your model correctly processes the message history1204 2. The model maintains appropriate context from previous messages1205 3. The response is a valid `langchain_core.messages.AIMessage`12061207 """1208 messages = [1209 HumanMessage("hello"),1210 AIMessage("hello"),1211 HumanMessage("how are you"),1212 ]12131214 result = model.invoke(messages)1215 assert result is not None1216 assert isinstance(result, AIMessage)1217 assert isinstance(result.text, str)1218 assert len(result.content) > 012191220 def test_double_messages_conversation(self, model: BaseChatModel) -> None:1221 """Test to verify that the model can handle double-message conversations.12221223 This should pass for all integrations. Tests the model's ability to process1224 a sequence of double-system, double-human, and double-ai messages as context1225 for generating the next response.12261227 ??? question "Troubleshooting"12281229 First, debug1230 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`1231 because this test also uses `model.invoke`.12321233 Second, debug1234 `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`1235 because this test is the "basic case" without double messages.12361237 If that test passes those but not this one, you should verify that:12381239 1. Your model API can handle double messages, or the integration should1240 merge messages before sending them to the API.1241 2. The response is a valid `langchain_core.messages.AIMessage`12421243 """1244 messages = [1245 SystemMessage("hello"),1246 SystemMessage("hello"),1247 HumanMessage("hello"),1248 HumanMessage("hello"),1249 AIMessage("hello"),1250 AIMessage("hello"),1251 HumanMessage("how are you"),1252 ]12531254 result = model.invoke(messages)1255 assert result is not None1256 assert isinstance(result, AIMessage)1257 assert isinstance(result.text, str)1258 assert len(result.content) > 012591260 def test_usage_metadata(self, model: BaseChatModel) -> None:1261 """Test to verify that the model returns correct usage metadata.12621263 This test is optional and should be skipped if the model does not return1264 usage metadata (see configuration below).12651266 !!! warning "Behavior changed in `langchain-tests` 0.3.17"12671268 Additionally check for the presence of `model_name` in the response1269 metadata, which is needed for usage tracking in callback handlers.12701271 ??? note "Configuration"12721273 By default, this test is run.12741275 To disable this feature, set `returns_usage_metadata` to `False` in your1276 test class:12771278 ```python1279 class TestMyChatModelIntegration(ChatModelIntegrationTests):1280 @property1281 def returns_usage_metadata(self) -> bool:1282 return False1283 ```12841285 This test can also check the format of specific kinds of usage metadata1286 based on the `supported_usage_metadata_details` property.12871288 This property should be configured as follows with the types of tokens that1289 the model supports tracking:12901291 ```python1292 class TestMyChatModelIntegration(ChatModelIntegrationTests):1293 @property1294 def supported_usage_metadata_details(self) -> dict:1295 return {1296 "invoke": [1297 "audio_input",1298 "audio_output",1299 "reasoning_output",1300 "cache_read_input",1301 "cache_creation_input",1302 ],1303 "stream": [1304 "audio_input",1305 "audio_output",1306 "reasoning_output",1307 "cache_read_input",1308 "cache_creation_input",1309 ],1310 }1311 ```13121313 ??? question "Troubleshooting"13141315 If this test fails, first verify that your model returns1316 `langchain_core.messages.ai.UsageMetadata` dicts1317 attached to the returned `AIMessage` object in `_generate`:13181319 ```python1320 return ChatResult(1321 generations=[1322 ChatGeneration(1323 message=AIMessage(1324 content="Output text",1325 usage_metadata={1326 "input_tokens": 350,1327 "output_tokens": 240,1328 "total_tokens": 590,1329 "input_token_details": {1330 "audio": 10,1331 "cache_creation": 200,1332 "cache_read": 100,1333 },1334 "output_token_details": {1335 "audio": 10,1336 "reasoning": 200,1337 },1338 },1339 )1340 )1341 ]1342 )1343 ```13441345 Check also that the response includes a `model_name` key in its1346 `usage_metadata`.1347 """1348 if not self.returns_usage_metadata:1349 pytest.skip("Not implemented.")13501351 result = model.invoke("Hello")1352 assert result is not None1353 assert isinstance(result, AIMessage)13541355 assert result.usage_metadata is not None1356 assert isinstance(result.usage_metadata["input_tokens"], int)1357 assert isinstance(result.usage_metadata["output_tokens"], int)1358 assert isinstance(result.usage_metadata["total_tokens"], int)13591360 # Check model_name is in response_metadata1361 # Needed for langchain_core.callbacks.usage1362 model_name = result.response_metadata.get("model_name")1363 assert isinstance(model_name, str)1364 assert model_name, "model_name is empty"13651366 # `input_tokens` is the total, possibly including other unclassified or1367 # system-level tokens.1368 if "audio_input" in self.supported_usage_metadata_details["invoke"]:1369 # Checks if the specific chat model integration being tested has declared1370 # that it supports reporting token counts specifically for `audio_input`1371 msg = self.invoke_with_audio_input() # To be implemented in test subclass1372 assert (usage_metadata := msg.usage_metadata) is not None1373 assert (1374 input_token_details := usage_metadata.get("input_token_details")1375 ) is not None1376 assert isinstance(input_token_details.get("audio"), int)1377 # Asserts that total input tokens are at least the sum of the token counts1378 assert usage_metadata.get("input_tokens", 0) >= sum(1379 v for v in input_token_details.values() if isinstance(v, int)1380 )1381 if "audio_output" in self.supported_usage_metadata_details["invoke"]:1382 msg = self.invoke_with_audio_output()1383 assert (usage_metadata := msg.usage_metadata) is not None1384 assert (1385 output_token_details := usage_metadata.get("output_token_details")1386 ) is not None1387 assert isinstance(output_token_details.get("audio"), int)1388 # Asserts that total output tokens are at least the sum of the token counts1389 assert usage_metadata.get("output_tokens", 0) >= sum(1390 v for v in output_token_details.values() if isinstance(v, int)1391 )1392 if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:1393 msg = self.invoke_with_reasoning_output()1394 assert (usage_metadata := msg.usage_metadata) is not None1395 assert (1396 output_token_details := usage_metadata.get("output_token_details")1397 ) is not None1398 assert isinstance(output_token_details.get("reasoning"), int)1399 # Asserts that total output tokens are at least the sum of the token counts1400 assert usage_metadata.get("output_tokens", 0) >= sum(1401 v for v in output_token_details.values() if isinstance(v, int)1402 )1403 if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:1404 msg = self.invoke_with_cache_read_input()1405 usage_metadata = msg.usage_metadata1406 assert usage_metadata is not None1407 input_token_details = usage_metadata.get("input_token_details")1408 assert input_token_details is not None1409 cache_read_tokens = input_token_details.get("cache_read")1410 assert isinstance(cache_read_tokens, int)1411 assert cache_read_tokens >= 01412 # Asserts that total input tokens are at least the sum of the token counts1413 total_detailed_tokens = sum(1414 v for v in input_token_details.values() if isinstance(v, int) and v >= 01415 )1416 input_tokens = usage_metadata.get("input_tokens", 0)1417 assert isinstance(input_tokens, int)1418 assert input_tokens >= total_detailed_tokens1419 if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:1420 msg = self.invoke_with_cache_creation_input()1421 usage_metadata = msg.usage_metadata1422 assert usage_metadata is not None1423 input_token_details = usage_metadata.get("input_token_details")1424 assert input_token_details is not None1425 cache_creation_tokens = input_token_details.get("cache_creation")1426 assert isinstance(cache_creation_tokens, int)1427 assert cache_creation_tokens >= 01428 # Asserts that total input tokens are at least the sum of the token counts1429 total_detailed_tokens = sum(1430 v for v in input_token_details.values() if isinstance(v, int) and v >= 01431 )1432 input_tokens = usage_metadata.get("input_tokens", 0)1433 assert isinstance(input_tokens, int)1434 assert input_tokens >= total_detailed_tokens14351436 def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:1437 """Test usage metadata in streaming mode.14381439 Test to verify that the model returns correct usage metadata in streaming mode.14401441 !!! warning "Behavior changed in `langchain-tests` 0.3.17"14421443 Additionally check for the presence of `model_name` in the response1444 metadata, which is needed for usage tracking in callback handlers.14451446 ??? note "Configuration"14471448 By default, this test is run.1449 To disable this feature, set `returns_usage_metadata` to `False` in your1450 test class:14511452 ```python1453 class TestMyChatModelIntegration(ChatModelIntegrationTests):1454 @property1455 def returns_usage_metadata(self) -> bool:1456 return False1457 ```14581459 This test can also check the format of specific kinds of usage metadata1460 based on the `supported_usage_metadata_details` property.14611462 This property should be configured as follows with the types of tokens that1463 the model supports tracking:14641465 ```python1466 class TestMyChatModelIntegration(ChatModelIntegrationTests):1467 @property1468 def supported_usage_metadata_details(self) -> dict:1469 return {1470 "invoke": [1471 "audio_input",1472 "audio_output",1473 "reasoning_output",1474 "cache_read_input",1475 "cache_creation_input",1476 ],1477 "stream": [1478 "audio_input",1479 "audio_output",1480 "reasoning_output",1481 "cache_read_input",1482 "cache_creation_input",1483 ],1484 }1485 ```14861487 ??? question "Troubleshooting"14881489 If this test fails, first verify that your model yields1490 `langchain_core.messages.ai.UsageMetadata` dicts1491 attached to the returned `AIMessage` object in `_stream`1492 that sum up to the total usage metadata.14931494 Note that `input_tokens` should only be included on one of the chunks1495 (typically the first or the last chunk), and the rest should have `0` or1496 `None` to avoid counting input tokens multiple times.14971498 `output_tokens` typically count the number of tokens in each chunk, not1499 the sum. This test will pass as long as the sum of `output_tokens` across1500 all chunks is not `0`.15011502 ```python1503 yield ChatResult(1504 generations=[1505 ChatGeneration(1506 message=AIMessage(1507 content="Output text",1508 usage_metadata={1509 "input_tokens": (1510 num_input_tokens if is_first_chunk else 01511 ),1512 "output_tokens": 11,1513 "total_tokens": (1514 11 + num_input_tokens if is_first_chunk else 111515 ),1516 "input_token_details": {1517 "audio": 10,1518 "cache_creation": 200,1519 "cache_read": 100,1520 },1521 "output_token_details": {1522 "audio": 10,1523 "reasoning": 200,1524 },1525 },1526 )1527 )1528 ]1529 )1530 ```15311532 Check also that the aggregated response includes a `model_name` key1533 in its `usage_metadata`.15341535 """1536 if not self.returns_usage_metadata:1537 pytest.skip("Not implemented.")15381539 full: AIMessageChunk | None = None1540 for chunk in model.stream("Write me 2 haikus. Only include the haikus."):1541 assert isinstance(chunk, AIMessageChunk)1542 # only one chunk is allowed to set usage_metadata.input_tokens1543 # if multiple do, it's likely a bug that will result in overcounting1544 # input tokens (since the total number of input tokens applies to the full1545 # generation, not individual chunks)1546 if full and full.usage_metadata and full.usage_metadata["input_tokens"]:1547 assert (1548 not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]1549 ), (1550 "Only one chunk should set input_tokens,"1551 " the rest should be 0 or None"1552 )1553 # only one chunk is allowed to set usage_metadata.model_name1554 # if multiple do, they'll be concatenated incorrectly1555 if full and full.usage_metadata and full.usage_metadata.get("model_name"):1556 assert not chunk.usage_metadata or not chunk.usage_metadata.get(1557 "model_name"1558 ), "Only one chunk should set model_name, the rest should be None"1559 full = chunk if full is None else full + chunk15601561 assert isinstance(full, AIMessageChunk)1562 assert full.usage_metadata is not None1563 assert isinstance(full.usage_metadata["input_tokens"], int)1564 assert isinstance(full.usage_metadata["output_tokens"], int)1565 assert isinstance(full.usage_metadata["total_tokens"], int)15661567 # Check model_name is in response_metadata1568 # Needed for langchain_core.callbacks.usage1569 model_name = full.response_metadata.get("model_name")1570 assert isinstance(model_name, str)1571 assert model_name, "model_name is empty"15721573 if "audio_input" in self.supported_usage_metadata_details["stream"]:1574 msg = self.invoke_with_audio_input(stream=True)1575 assert msg.usage_metadata is not None1576 assert isinstance(1577 msg.usage_metadata.get("input_token_details", {}).get("audio"), int1578 )1579 if "audio_output" in self.supported_usage_metadata_details["stream"]:1580 msg = self.invoke_with_audio_output(stream=True)1581 assert msg.usage_metadata is not None1582 assert isinstance(1583 msg.usage_metadata.get("output_token_details", {}).get("audio"), int1584 )1585 if "reasoning_output" in self.supported_usage_metadata_details["stream"]:1586 msg = self.invoke_with_reasoning_output(stream=True)1587 assert msg.usage_metadata is not None1588 assert isinstance(1589 msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int1590 )1591 if "cache_read_input" in self.supported_usage_metadata_details["stream"]:1592 msg = self.invoke_with_cache_read_input(stream=True)1593 assert msg.usage_metadata is not None1594 assert isinstance(1595 msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int1596 )1597 if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:1598 msg = self.invoke_with_cache_creation_input(stream=True)1599 assert msg.usage_metadata is not None1600 assert isinstance(1601 msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),1602 int,1603 )16041605 def test_stop_sequence(self, model: BaseChatModel) -> None:1606 """Test that model does not fail when invoked with the `stop` parameter.16071608 The `stop` parameter is a standard parameter for stopping generation at a1609 certain token.16101611 [More on standard parameters](https://python.langchain.com/docs/concepts/chat_models/#standard-parameters).16121613 This should pass for all integrations.16141615 ??? question "Troubleshooting"16161617 If this test fails, check that the function signature for `_generate`1618 (as well as `_stream` and async variants) accepts the `stop` parameter:16191620 ```python1621 def _generate(1622 self,1623 messages: List[BaseMessage],1624 stop: list[str] | None = None,1625 run_manager: CallbackManagerForLLMRun | None = None,1626 **kwargs: Any,1627 ) -> ChatResult:16281629 ```1630 """1631 result = model.invoke("hi", stop=["you"])1632 assert isinstance(result, AIMessage)16331634 custom_model = self.chat_model_class(1635 **{1636 **self.chat_model_params,1637 "stop": ["you"],1638 }1639 )1640 result = custom_model.invoke("hi")1641 assert isinstance(result, AIMessage)16421643 @pytest.mark.parametrize("model", [{}, {"output_version": "v1"}], indirect=True)1644 def test_tool_calling(self, model: BaseChatModel) -> None:1645 """Test that the model generates tool calls.16461647 This test is skipped if the `has_tool_calling` property on the test class is1648 set to `False`.16491650 This test is optional and should be skipped if the model does not support1651 tool calling (see configuration below).16521653 ??? note "Configuration"16541655 To disable tool calling tests, set `has_tool_calling` to `False` in your1656 test class:16571658 ```python1659 class TestMyChatModelIntegration(ChatModelIntegrationTests):1660 @property1661 def has_tool_calling(self) -> bool:1662 return False1663 ```16641665 ??? question "Troubleshooting"16661667 If this test fails, check that `bind_tools` is implemented to correctly1668 translate LangChain tool objects into the appropriate schema for your1669 chat model.16701671 This test may fail if the chat model does not support a `tool_choice`1672 parameter. This parameter can be used to force a tool call. If1673 `tool_choice` is not supported and the model consistently fails this1674 test, you can `xfail` the test:16751676 ```python1677 @pytest.mark.xfail(reason=("Does not support tool_choice."))1678 def test_tool_calling(self, model: BaseChatModel) -> None:1679 super().test_tool_calling(model)1680 ```16811682 Otherwise, in the case that only one tool is bound, ensure that1683 `tool_choice` supports the string `'any'` to force calling that tool.16841685 If `tool_call_streaming = true` is set in the model's profile1686 augmentations, individual chunks are also validated to contain1687 `tool_call_chunk` blocks in `content_blocks`.16881689 """1690 if not self.has_tool_calling:1691 pytest.skip("Test requires tool calling.")16921693 tool_choice_value = None if not self.has_tool_choice else "any"1694 model_with_tools = model.bind_tools(1695 [magic_function], tool_choice=tool_choice_value1696 )16971698 # Test invoke1699 query = "What is the value of magic_function(3)? Use the tool."1700 result = model_with_tools.invoke(query)1701 _validate_tool_call_message(result)17021703 tool_call_streaming = (1704 model.profile.get("tool_call_streaming", False) if model.profile else False1705 )17061707 # Test stream1708 full: BaseMessage | None = None1709 found_tool_call_chunk = False1710 for chunk in model_with_tools.stream(query):1711 if tool_call_streaming and isinstance(chunk, AIMessageChunk):1712 found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1713 full = chunk if full is None else full + chunk # type: ignore[assignment]1714 assert isinstance(full, AIMessage)1715 _validate_tool_call_message(full)17161717 if tool_call_streaming:1718 assert found_tool_call_chunk, (1719 "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1720 "least one chunk during streaming, but none were found. If this "1721 "model does not support streaming tool calls, set "1722 "tool_call_streaming=false in the model's profile augmentations."1723 )17241725 async def test_tool_calling_async(self, model: BaseChatModel) -> None:1726 """Test that the model generates tool calls.17271728 This test is skipped if the `has_tool_calling` property on the test class is1729 set to `False`.17301731 This test is optional and should be skipped if the model does not support1732 tool calling (see configuration below).17331734 ??? note "Configuration"17351736 To disable tool calling tests, set `has_tool_calling` to `False` in your1737 test class:17381739 ```python1740 class TestMyChatModelIntegration(ChatModelIntegrationTests):1741 @property1742 def has_tool_calling(self) -> bool:1743 return False1744 ```17451746 ??? question "Troubleshooting"17471748 If this test fails, check that `bind_tools` is implemented to correctly1749 translate LangChain tool objects into the appropriate schema for your1750 chat model.17511752 This test may fail if the chat model does not support a `tool_choice`1753 parameter. This parameter can be used to force a tool call. If1754 `tool_choice` is not supported and the model consistently fails this1755 test, you can `xfail` the test:17561757 ```python1758 @pytest.mark.xfail(reason=("Does not support tool_choice."))1759 async def test_tool_calling_async(self, model: BaseChatModel) -> None:1760 await super().test_tool_calling_async(model)1761 ```17621763 Otherwise, in the case that only one tool is bound, ensure that1764 `tool_choice` supports the string `'any'` to force calling that tool.17651766 See `test_tool_calling` for `tool_call_streaming` profile configuration.17671768 """1769 if not self.has_tool_calling:1770 pytest.skip("Test requires tool calling.")17711772 tool_choice_value = None if not self.has_tool_choice else "any"1773 model_with_tools = model.bind_tools(1774 [magic_function], tool_choice=tool_choice_value1775 )17761777 # Test ainvoke1778 query = "What is the value of magic_function(3)? Use the tool."1779 result = await model_with_tools.ainvoke(query)1780 _validate_tool_call_message(result)17811782 tool_call_streaming = (1783 model.profile.get("tool_call_streaming", False) if model.profile else False1784 )17851786 # Test astream1787 full: BaseMessage | None = None1788 found_tool_call_chunk = False1789 async for chunk in model_with_tools.astream(query):1790 if tool_call_streaming and isinstance(chunk, AIMessageChunk):1791 found_tool_call_chunk |= _validate_tool_call_chunk(chunk)1792 full = chunk if full is None else full + chunk # type: ignore[assignment]1793 assert isinstance(full, AIMessage)1794 _validate_tool_call_message(full)17951796 if tool_call_streaming:1797 assert found_tool_call_chunk, (1798 "Expected to find 'tool_call_chunk' blocks in content_blocks of at "1799 "least one chunk during streaming, but none were found. If this "1800 "model does not support streaming tool calls, set "1801 "tool_call_streaming=false in the model's profile augmentations."1802 )18031804 def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:1805 """Test bind runnables as tools.18061807 Test that the model generates tool calls for tools that are derived from1808 LangChain runnables. This test is skipped if the `has_tool_calling` property1809 on the test class is set to `False`.18101811 This test is optional and should be skipped if the model does not support1812 tool calling (see configuration below).18131814 ??? note "Configuration"18151816 To disable tool calling tests, set `has_tool_calling` to `False` in your1817 test class:18181819 ```python1820 class TestMyChatModelIntegration(ChatModelIntegrationTests):1821 @property1822 def has_tool_calling(self) -> bool:1823 return False1824 ```18251826 ??? question "Troubleshooting"18271828 If this test fails, check that `bind_tools` is implemented to correctly1829 translate LangChain tool objects into the appropriate schema for your1830 chat model.18311832 This test may fail if the chat model does not support a `tool_choice`1833 parameter. This parameter can be used to force a tool call. If1834 `tool_choice` is not supported, set `has_tool_choice` to `False` in1835 your test class:18361837 ```python1838 @property1839 def has_tool_choice(self) -> bool:1840 return False1841 ```18421843 """1844 if not self.has_tool_calling:1845 pytest.skip("Test requires tool calling.")18461847 prompt = ChatPromptTemplate.from_messages(1848 [("human", "Hello. Please respond in the style of {answer_style}.")]1849 )1850 llm = GenericFakeChatModel(messages=iter(["hello matey"]))1851 chain = prompt | llm | StrOutputParser()1852 tool_ = chain.as_tool(1853 name="greeting_generator",1854 description="Generate a greeting in a particular style of speaking.",1855 )1856 if self.has_tool_choice:1857 tool_choice: str | None = "any"1858 else:1859 tool_choice = None1860 model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)1861 query = "Using the tool, generate a Pirate greeting."1862 result = model_with_tools.invoke(query)1863 assert isinstance(result, AIMessage)1864 assert result.tool_calls1865 tool_call = result.tool_calls[0]1866 assert tool_call["args"].get("answer_style")1867 assert tool_call.get("type") == "tool_call"18681869 def test_tool_message_histories_string_content(1870 self, model: BaseChatModel, my_adder_tool: BaseTool1871 ) -> None:1872 """Test that message histories are compatible with string tool contents.18731874 For instance with OpenAI format contents.1875 If a model passes this test, it should be compatible1876 with messages generated from providers following OpenAI format.18771878 This test should be skipped if the model does not support tool calling1879 (see configuration below).18801881 ??? note "Configuration"18821883 To disable tool calling tests, set `has_tool_calling` to `False` in your1884 test class:18851886 ```python1887 class TestMyChatModelIntegration(ChatModelIntegrationTests):1888 @property1889 def has_tool_calling(self) -> bool:1890 return False1891 ```18921893 ??? question "Troubleshooting"18941895 If this test fails, check that:18961897 1. The model can correctly handle message histories that include1898 `AIMessage` objects with `""` content.1899 2. The `tool_calls` attribute on `AIMessage` objects is correctly1900 handled and passed to the model in an appropriate format.1901 3. The model can correctly handle `ToolMessage` objects with string1902 content and arbitrary string values for `tool_call_id`.19031904 You can `xfail` the test if tool calling is implemented but this format1905 is not supported.19061907 ```python1908 @pytest.mark.xfail(reason=("Not implemented."))1909 def test_tool_message_histories_string_content(self, *args: Any) -> None:1910 super().test_tool_message_histories_string_content(*args)1911 ```1912 """1913 if not self.has_tool_calling:1914 pytest.skip("Test requires tool calling.")19151916 model_with_tools = model.bind_tools([my_adder_tool])1917 function_name = "my_adder_tool"1918 function_args = {"a": 1, "b": 2}19191920 messages_string_content = [1921 HumanMessage("What is 1 + 2"),1922 # string content (e.g. OpenAI)1923 AIMessage(1924 "",1925 tool_calls=[1926 {1927 "name": function_name,1928 "args": function_args,1929 "id": "abc123",1930 "type": "tool_call",1931 },1932 ],1933 ),1934 ToolMessage(1935 json.dumps({"result": 3}),1936 name=function_name,1937 tool_call_id="abc123",1938 ),1939 ]1940 result_string_content = model_with_tools.invoke(messages_string_content)1941 assert isinstance(result_string_content, AIMessage)19421943 def test_tool_message_histories_list_content(1944 self,1945 model: BaseChatModel,1946 my_adder_tool: BaseTool,1947 ) -> None:1948 """Test that message histories are compatible with list tool contents.19491950 For instance with Anthropic format contents.19511952 These message histories will include `AIMessage` objects with "tool use" and1953 content blocks, e.g.,19541955 ```python1956 [1957 {"type": "text", "text": "Hmm let me think about that"},1958 {1959 "type": "tool_use",1960 "input": {"fav_color": "green"},1961 "id": "foo",1962 "name": "color_picker",1963 },1964 ]1965 ```19661967 This test should be skipped if the model does not support tool calling1968 (see configuration below).19691970 ??? note "Configuration"19711972 To disable tool calling tests, set `has_tool_calling` to `False` in your1973 test class:19741975 ```python1976 class TestMyChatModelIntegration(ChatModelIntegrationTests):1977 @property1978 def has_tool_calling(self) -> bool:1979 return False1980 ```19811982 ??? question "Troubleshooting"19831984 If this test fails, check that:19851986 1. The model can correctly handle message histories that include1987 `AIMessage` objects with list content.1988 2. The `tool_calls` attribute on `AIMessage` objects is correctly1989 handled and passed to the model in an appropriate format.1990 3. The model can correctly handle ToolMessage objects with string content1991 and arbitrary string values for `tool_call_id`.19921993 You can `xfail` the test if tool calling is implemented but this format1994 is not supported.19951996 ```python1997 @pytest.mark.xfail(reason=("Not implemented."))1998 def test_tool_message_histories_list_content(self, *args: Any) -> None:1999 super().test_tool_message_histories_list_content(*args)2000 ```
Findings
✓ No findings reported for this file.