libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py PYTHON 1,894 lines View on github.com → Search inside
1"""Test Responses API usage."""23import base644import json5import os6from typing import TYPE_CHECKING, Annotated, Any, Literal, cast78import openai9import pytest10from langchain.agents import create_agent11from langchain.agents.middleware.types import (12    AgentMiddleware,13    AgentState,14    ToolCallRequest,15    hook_config,16)17from langchain_core.messages import (18    AIMessage,19    AIMessageChunk,20    BaseMessage,21    BaseMessageChunk,22    HumanMessage,23    MessageLikeRepresentation,24    ToolMessage,25)26from langchain_core.tools import tool27from langchain_core.utils.function_calling import convert_to_openai_tool28from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream29from pydantic import BaseModel30from typing_extensions import TypedDict3132from langchain_openai import ChatOpenAI, custom_tool33from langchain_openai.chat_models.base import _convert_to_openai_response_format3435if TYPE_CHECKING:36    from collections.abc import Awaitable3738    from langchain_core.language_models.chat_model_stream import (39        AsyncChatModelStream,40        ChatModelStream,41    )4243MODEL_NAME = "gpt-4o-mini"444546def _check_response(response: BaseMessage | None) -> None:47    assert isinstance(response, AIMessage)48    assert isinstance(response.content, list)49    for block in response.content:50        assert isinstance(block, dict)51        if block["type"] == "text":52            assert isinstance(block.get("text"), str)53            annotations = block.get("annotations", [])54            for annotation in annotations:55                if annotation["type"] == "file_citation":56                    assert all(57                        key in annotation58                        for key in ["file_id", "filename", "file_index", "type"]59                    )60                elif annotation["type"] == "web_search":61                    assert all(62                        key in annotation63                        for key in ["end_index", "start_index", "title", "type", "url"]64                    )65                elif annotation["type"] == "citation":66                    assert all(key in annotation for key in ["title", "type"])67                    if "url" in annotation:68                        assert "start_index" in annotation69                        assert "end_index" in annotation70    text_content = response.text  # type: ignore[operator,misc]71    assert isinstance(text_content, str)72    assert text_content73    assert response.usage_metadata74    assert response.usage_metadata["input_tokens"] > 075    assert response.usage_metadata["output_tokens"] > 076    assert response.usage_metadata["total_tokens"] > 077    assert response.response_metadata["model_name"]78    assert response.response_metadata["service_tier"]  # type: ignore[typeddict-item]798081@pytest.mark.vcr82def test_incomplete_response() -> None:83    model = ChatOpenAI(84        model=MODEL_NAME, use_responses_api=True, max_completion_tokens=1685    )86    response = model.invoke("Tell me a 100 word story about a bear.")87    assert response.response_metadata["incomplete_details"]88    assert response.response_metadata["incomplete_details"]["reason"]89    assert response.response_metadata["status"] == "incomplete"9091    full: AIMessageChunk | None = None92    for chunk in model.stream("Tell me a 100 word story about a bear."):93        assert isinstance(chunk, AIMessageChunk)94        full = chunk if full is None else full + chunk95    assert isinstance(full, AIMessageChunk)96    assert full.response_metadata["incomplete_details"]97    assert full.response_metadata["incomplete_details"]["reason"]98    assert full.response_metadata["status"] == "incomplete"99100101@pytest.mark.default_cassette("test_web_search.yaml.gz")102@pytest.mark.vcr103@pytest.mark.parametrize(104    ("output_version", "use_v2_stream"),105    [106        ("responses/v1", False),107        ("v1", False),108        ("v1", True),109    ],110)111def test_web_search(112    output_version: Literal["responses/v1", "v1"], use_v2_stream: bool113) -> None:114    llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)115    first_response = llm.invoke(116        "What was a positive news story from today?",117        tools=[{"type": "web_search_preview"}],118    )119    _check_response(first_response)120121    # Test streaming122    full: BaseMessage123    if use_v2_stream:124        full = llm.stream_events(125            "What was a positive news story from today?",126            tools=[{"type": "web_search_preview"}],127            version="v3",128        ).output129    else:130        aggregated: BaseMessageChunk | None = None131        for chunk in llm.stream(132            "What was a positive news story from today?",133            tools=[{"type": "web_search_preview"}],134        ):135            assert isinstance(chunk, AIMessageChunk)136            aggregated = chunk if aggregated is None else aggregated + chunk137        assert aggregated is not None138        full = aggregated139    _check_response(full)140141    # Use OpenAI's stateful API142    response = llm.invoke(143        "what about a negative one",144        tools=[{"type": "web_search_preview"}],145        previous_response_id=first_response.response_metadata["id"],146    )147    _check_response(response)148149    # Manually pass in chat history150    response = llm.invoke(151        [152            {"role": "user", "content": "What was a positive news story from today?"},153            first_response,154            {"role": "user", "content": "what about a negative one"},155        ],156        tools=[{"type": "web_search_preview"}],157    )158    _check_response(response)159160    # Bind tool161    response = llm.bind_tools([{"type": "web_search_preview"}]).invoke(162        "What was a positive news story from today?"163    )164    _check_response(response)165166    for msg in [first_response, full, response]:167        assert msg is not None168        block_types = [block["type"] for block in msg.content]  # type: ignore[index]169        if output_version == "responses/v1":170            assert block_types == ["web_search_call", "text"]171        else:172            assert block_types == ["server_tool_call", "server_tool_result", "text"]173174175@pytest.mark.flaky(retries=3, delay=1)176async def test_web_search_async() -> None:177    llm = ChatOpenAI(model=MODEL_NAME, output_version="v0")178    response = await llm.ainvoke(179        "What was a positive news story from today?",180        tools=[{"type": "web_search_preview"}],181    )182    _check_response(response)183    assert response.response_metadata["status"]184185    # Test streaming186    full: BaseMessageChunk | None = None187    async for chunk in llm.astream(188        "What was a positive news story from today?",189        tools=[{"type": "web_search_preview"}],190    ):191        assert isinstance(chunk, AIMessageChunk)192        full = chunk if full is None else full + chunk193    assert isinstance(full, AIMessageChunk)194    _check_response(full)195196    for msg in [response, full]:197        assert msg.additional_kwargs["tool_outputs"]198        assert len(msg.additional_kwargs["tool_outputs"]) == 1199        tool_output = msg.additional_kwargs["tool_outputs"][0]200        assert tool_output["type"] == "web_search_call"201202203@pytest.mark.default_cassette("test_apply_patch.yaml.gz")204@pytest.mark.vcr205def test_apply_patch() -> None:206    """Test the apply_patch built-in tool end-to-end.207208    apply_patch is a client-executed tool: the model proposes a file operation209    via an `apply_patch_call` block, the client applies it, and the result is210    returned as an `apply_patch_call_output` block. Requires a model that211    supports the tool.212    """213    prompt = "Create a new file named hello.txt containing the line: hello world"214    llm = ChatOpenAI(model="gpt-5.1", output_version="responses/v1")215    tool = {"type": "apply_patch"}216217    # Non-streaming: the model should emit an apply_patch_call block.218    response = llm.invoke(prompt, tools=[tool])219    assert isinstance(response, AIMessage)220    calls = [221        block222        for block in response.content223        if isinstance(block, dict) and block["type"] == "apply_patch_call"224    ]225    assert len(calls) == 1226    call = calls[0]227    assert call["call_id"]228    assert call["operation"]["type"] in ("create_file", "update_file", "delete_file")229230    # Streaming: the apply_patch_call block survives chunk aggregation.231    aggregated: BaseMessageChunk | None = None232    for chunk in llm.stream(prompt, tools=[tool]):233        assert isinstance(chunk, AIMessageChunk)234        aggregated = chunk if aggregated is None else aggregated + chunk235    assert isinstance(aggregated, AIMessageChunk)236    assert any(237        isinstance(block, dict) and block["type"] == "apply_patch_call"238        for block in aggregated.content239    )240241    # Round-trip: return an apply_patch_call_output and continue the conversation.242    output_message = HumanMessage(243        content=[244            {245                "type": "apply_patch_call_output",246                "call_id": call["call_id"],247                "status": "completed",248                "output": f"Created {call['operation']['path']}",249            }250        ]251    )252    follow_up = llm.invoke(253        [HumanMessage(prompt), response, output_message],254        tools=[tool],255    )256    assert isinstance(follow_up, AIMessage)257258259@pytest.mark.default_cassette("test_function_calling.yaml.gz")260@pytest.mark.vcr261@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])262def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None:263    def multiply(x: int, y: int) -> int:264        """return x * y"""265        return x * y266267    llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)268    bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])269    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))270    assert len(ai_msg.tool_calls) == 1271    assert ai_msg.tool_calls[0]["name"] == "multiply"272    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}273274    full: Any = None275    for chunk in bound_llm.stream("whats 5 * 4"):276        assert isinstance(chunk, AIMessageChunk)277        full = chunk if full is None else full + chunk278    assert len(full.tool_calls) == 1279    assert full.tool_calls[0]["name"] == "multiply"280    assert set(full.tool_calls[0]["args"]) == {"x", "y"}281282    for msg in [ai_msg, full]:283        assert len(msg.content_blocks) == 1284        assert msg.content_blocks[0]["type"] == "tool_call"285286    response = bound_llm.invoke("What was a positive news story from today?")287    _check_response(response)288289290@pytest.mark.default_cassette("test_agent_loop.yaml.gz")291@pytest.mark.vcr292@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])293def test_agent_loop(output_version: Literal["responses/v1", "v1"]) -> None:294    @tool295    def get_weather(location: str) -> str:296        """Get the weather for a location."""297        return "It's sunny."298299    llm = ChatOpenAI(300        model="gpt-5.4",301        use_responses_api=True,302        output_version=output_version,303    )304    llm_with_tools = llm.bind_tools([get_weather])305    input_message = HumanMessage("What is the weather in San Francisco, CA?")306    tool_call_message = llm_with_tools.invoke([input_message])307    assert isinstance(tool_call_message, AIMessage)308    tool_calls = tool_call_message.tool_calls309    assert len(tool_calls) == 1310    tool_call = tool_calls[0]311    tool_message = get_weather.invoke(tool_call)312    assert isinstance(tool_message, ToolMessage)313    response = llm_with_tools.invoke(314        [315            input_message,316            tool_call_message,317            tool_message,318        ]319    )320    assert isinstance(response, AIMessage)321322323@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")324@pytest.mark.vcr325@pytest.mark.parametrize(326    ("output_version", "use_v2_stream"),327    [328        ("responses/v1", False),329        ("responses/v1", True),330        ("v1", False),331        ("v1", True),332    ],333)334def test_agent_loop_streaming(335    output_version: Literal["responses/v1", "v1"], use_v2_stream: bool336) -> None:337    @tool338    def get_weather(location: str) -> str:339        """Get the weather for a location."""340        return "It's sunny."341342    llm = ChatOpenAI(343        model="gpt-5.2",344        use_responses_api=True,345        reasoning={"effort": "medium", "summary": "auto"},346        streaming=True,347        output_version=output_version,348    )349    llm_with_tools = llm.bind_tools([get_weather])350    input_message = HumanMessage("What is the weather in San Francisco, CA?")351    if use_v2_stream:352        tool_call_message = cast(353            "ChatModelStream",354            llm_with_tools.stream_events([input_message], version="v3"),355        ).output356    else:357        tool_call_message = llm_with_tools.invoke([input_message])358    assert isinstance(tool_call_message, AIMessage)359    tool_calls = tool_call_message.tool_calls360    assert len(tool_calls) == 1361    tool_call = tool_calls[0]362    tool_message = get_weather.invoke(tool_call)363    assert isinstance(tool_message, ToolMessage)364    if use_v2_stream:365        response = cast(366            "ChatModelStream",367            llm_with_tools.stream_events(368                [input_message, tool_call_message, tool_message],369                version="v3",370            ),371        ).output372    else:373        response = llm_with_tools.invoke(374            [375                input_message,376                tool_call_message,377                tool_message,378            ]379        )380    assert isinstance(response, AIMessage)381382383@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")384@pytest.mark.vcr385async def test_agent_loop_streaming_astream_events_v3_v1() -> None:386    """Async multi-turn through `astream_events(version="v3")`.387388    Mirrors `test_agent_loop_streaming` for `output_version="v1"` but389    exercises `AsyncChatModelStream` end-to-end: aggregation in the390    async state machine, async projections, and the background391    producer task. Cassette byte-matches guarantee the aggregated392    message serializes identically to the legacy path on the393    follow-up turn.394    """395396    @tool397    def get_weather(location: str) -> str:398        """Get the weather for a location."""399        return "It's sunny."400401    llm = ChatOpenAI(402        model="gpt-5.2",403        use_responses_api=True,404        reasoning={"effort": "medium", "summary": "auto"},405        streaming=True,406        output_version="v1",407    )408    llm_with_tools = llm.bind_tools([get_weather])409    input_message = HumanMessage("What is the weather in San Francisco, CA?")410    stream = await cast(411        "Awaitable[AsyncChatModelStream]",412        llm_with_tools.astream_events([input_message], version="v3"),413    )414    tool_call_message = await stream415    assert isinstance(tool_call_message, AIMessage)416    tool_calls = tool_call_message.tool_calls417    assert len(tool_calls) == 1418    tool_call = tool_calls[0]419    tool_message = get_weather.invoke(tool_call)420    assert isinstance(tool_message, ToolMessage)421    stream = await cast(422        "Awaitable[AsyncChatModelStream]",423        llm_with_tools.astream_events(424            [input_message, tool_call_message, tool_message],425            version="v3",426        ),427    )428    response = await stream429    assert isinstance(response, AIMessage)430431432class Foo(BaseModel):433    response: str434435436class FooDict(TypedDict):437    response: str438439440@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz")441@pytest.mark.vcr442@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])443def test_parsed_pydantic_schema(444    output_version: Literal["v0", "responses/v1", "v1"],445) -> None:446    llm = ChatOpenAI(447        model=MODEL_NAME, use_responses_api=True, output_version=output_version448    )449    response = llm.invoke("how are ya", response_format=Foo)450    parsed = Foo(**json.loads(response.text))451    assert parsed == response.additional_kwargs["parsed"]452    assert parsed.response453454    # Test stream455    full: BaseMessageChunk | None = None456    for chunk in llm.stream("how are ya", response_format=Foo):457        assert isinstance(chunk, AIMessageChunk)458        full = chunk if full is None else full + chunk459    assert isinstance(full, AIMessageChunk)460    parsed = Foo(**json.loads(full.text))461    assert parsed == full.additional_kwargs["parsed"]462    assert parsed.response463464465async def test_parsed_pydantic_schema_async() -> None:466    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)467    response = await llm.ainvoke("how are ya", response_format=Foo)468    parsed = Foo(**json.loads(response.text))469    assert parsed == response.additional_kwargs["parsed"]470    assert parsed.response471472    # Test stream473    full: BaseMessageChunk | None = None474    async for chunk in llm.astream("how are ya", response_format=Foo):475        assert isinstance(chunk, AIMessageChunk)476        full = chunk if full is None else full + chunk477    assert isinstance(full, AIMessageChunk)478    parsed = Foo(**json.loads(full.text))479    assert parsed == full.additional_kwargs["parsed"]480    assert parsed.response481482483@pytest.mark.flaky(retries=3, delay=1)484@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])485def test_parsed_dict_schema(schema: Any) -> None:486    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)487    response = llm.invoke("how are ya", response_format=schema)488    parsed = json.loads(response.text)489    assert parsed == response.additional_kwargs["parsed"]490    assert parsed["response"]491    assert isinstance(parsed["response"], str)492493    # Test stream494    full: BaseMessageChunk | None = None495    for chunk in llm.stream("how are ya", response_format=schema):496        assert isinstance(chunk, AIMessageChunk)497        full = chunk if full is None else full + chunk498    assert isinstance(full, AIMessageChunk)499    parsed = json.loads(full.text)500    assert parsed == full.additional_kwargs["parsed"]501    assert parsed["response"]502    assert isinstance(parsed["response"], str)503504505def test_parsed_strict() -> None:506    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)507508    class Joke(TypedDict):509        setup: Annotated[str, ..., "The setup of the joke"]510        punchline: Annotated[str, None, "The punchline of the joke"]511512    schema = _convert_to_openai_response_format(Joke)513    invalid_schema = cast(dict, _convert_to_openai_response_format(Joke, strict=True))514    invalid_schema["json_schema"]["schema"]["required"] = ["setup"]  # make invalid515516    # Test not strict517    response = llm.invoke("Tell me a joke", response_format=schema)518    parsed = json.loads(response.text)519    assert parsed == response.additional_kwargs["parsed"]520521    # Test strict522    with pytest.raises(openai.BadRequestError):523        llm.invoke(524            "Tell me a joke about cats.", response_format=invalid_schema, strict=True525        )526    with pytest.raises(openai.BadRequestError):527        next(528            llm.stream(529                "Tell me a joke about cats.",530                response_format=invalid_schema,531                strict=True,532            )533        )534535536@pytest.mark.flaky(retries=3, delay=1)537@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])538async def test_parsed_dict_schema_async(schema: Any) -> None:539    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)540    response = await llm.ainvoke("how are ya", response_format=schema)541    parsed = json.loads(response.text)542    assert parsed == response.additional_kwargs["parsed"]543    assert parsed["response"]544    assert isinstance(parsed["response"], str)545546    # Test stream547    full: BaseMessageChunk | None = None548    async for chunk in llm.astream("how are ya", response_format=schema):549        assert isinstance(chunk, AIMessageChunk)550        full = chunk if full is None else full + chunk551    assert isinstance(full, AIMessageChunk)552    parsed = json.loads(full.text)553    assert parsed == full.additional_kwargs["parsed"]554    assert parsed["response"]555    assert isinstance(parsed["response"], str)556557558@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict])559def test_function_calling_and_structured_output(schema: Any) -> None:560    def multiply(x: int, y: int) -> int:561        """return x * y"""562        return x * y563564    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)565    bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True)566    # Test structured output567    response = llm.invoke("how are ya", response_format=schema)568    if schema == Foo:569        parsed = schema(**json.loads(response.text))570        assert parsed.response571    else:572        parsed = json.loads(response.text)573        assert parsed["response"]574    assert parsed == response.additional_kwargs["parsed"]575576    # Test function calling577    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))578    assert len(ai_msg.tool_calls) == 1579    assert ai_msg.tool_calls[0]["name"] == "multiply"580    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}581582583@pytest.mark.default_cassette("test_reasoning.yaml.gz")584@pytest.mark.vcr585@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])586def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None:587    llm = ChatOpenAI(588        model="gpt-5-nano", use_responses_api=True, output_version=output_version589    )590    response = llm.invoke("Hello", reasoning={"effort": "low"})591    assert isinstance(response, AIMessage)592593    # Test init params + streaming594    llm = ChatOpenAI(595        model="gpt-5-nano", reasoning={"effort": "low"}, output_version=output_version596    )597    full: BaseMessageChunk | None = None598    for chunk in llm.stream("Hello"):599        assert isinstance(chunk, AIMessageChunk)600        full = chunk if full is None else full + chunk601    assert isinstance(full, AIMessage)602603    for msg in [response, full]:604        if output_version == "v0":605            assert msg.additional_kwargs["reasoning"]606        else:607            block_types = [block["type"] for block in msg.content]608            assert block_types == ["reasoning", "text"]609610611def test_stateful_api() -> None:612    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)613    response = llm.invoke("how are you, my name is Bobo")614    assert "id" in response.response_metadata615616    second_response = llm.invoke(617        "what's my name", previous_response_id=response.response_metadata["id"]618    )619    assert isinstance(second_response.content, list)620    assert "bobo" in second_response.content[0]["text"].lower()  # type: ignore621622623def test_route_from_model_kwargs() -> None:624    llm = ChatOpenAI(625        model=MODEL_NAME, model_kwargs={"text": {"format": {"type": "text"}}}626    )627    _ = next(llm.stream("Hello"))628629630@pytest.mark.flaky(retries=3, delay=1)631def test_computer_calls() -> None:632    llm = ChatOpenAI(model="gpt-5.4")633    tool = {"type": "computer"}634    llm_with_tools = llm.bind_tools([tool], tool_choice="any")635    response = llm_with_tools.invoke("Please open the browser.")636    assert any(block["type"] == "computer_call" for block in response.content)  # type: ignore[index]637638639@pytest.mark.default_cassette("test_file_search.yaml.gz")640@pytest.mark.vcr641@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])642def test_file_search(643    output_version: Literal["responses/v1", "v1"],644) -> None:645    vector_store_id = os.getenv("OPENAI_VECTOR_STORE_ID")646    if not vector_store_id:647        pytest.skip()648649    llm = ChatOpenAI(650        model=MODEL_NAME,651        use_responses_api=True,652        output_version=output_version,653    )654    tool = {655        "type": "file_search",656        "vector_store_ids": [vector_store_id],657    }658659    input_message = {"role": "user", "content": "What is deep research by OpenAI?"}660    response = llm.invoke([input_message], tools=[tool])661    _check_response(response)662663    if output_version == "v1":664        assert [block["type"] for block in response.content] == [  # type: ignore[index]665            "server_tool_call",666            "server_tool_result",667            "text",668        ]669    else:670        assert [block["type"] for block in response.content] == [  # type: ignore[index]671            "file_search_call",672            "text",673        ]674675    full: AIMessageChunk | None = None676    for chunk in llm.stream([input_message], tools=[tool]):677        assert isinstance(chunk, AIMessageChunk)678        full = chunk if full is None else full + chunk679    assert isinstance(full, AIMessageChunk)680    _check_response(full)681682    if output_version == "v1":683        assert [block["type"] for block in full.content] == [  # type: ignore[index]684            "server_tool_call",685            "server_tool_result",686            "text",687        ]688    else:689        assert [block["type"] for block in full.content] == ["file_search_call", "text"]  # type: ignore[index]690691    next_message = {"role": "user", "content": "Thank you."}692    _ = llm.invoke([input_message, full, next_message])693694    for message in [response, full]:695        assert [block["type"] for block in message.content_blocks] == [696            "server_tool_call",697            "server_tool_result",698            "text",699        ]700701702@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz")703@pytest.mark.vcr704@pytest.mark.parametrize(705    ("output_version", "use_v2_stream"),706    [707        ("v0", False),708        ("responses/v1", False),709        ("v1", False),710        ("v1", True),711    ],712)713def test_stream_reasoning_summary(714    output_version: Literal["v0", "responses/v1", "v1"],715    use_v2_stream: bool,716) -> None:717    llm = ChatOpenAI(718        model="gpt-5-nano",719        # Routes to Responses API if `reasoning` is set.720        reasoning={"effort": "medium", "summary": "auto"},721        output_version=output_version,722    )723    message_1 = {724        "role": "user",725        "content": "What was the third tallest buliding in the year 2000?",726    }727    response_1: BaseMessage728    if use_v2_stream:729        response_1 = llm.stream_events([message_1], version="v3").output730    else:731        aggregated: BaseMessageChunk | None = None732        for chunk in llm.stream([message_1]):733            assert isinstance(chunk, AIMessageChunk)734            aggregated = chunk if aggregated is None else aggregated + chunk735        assert isinstance(aggregated, AIMessageChunk)736        response_1 = aggregated737    if output_version == "v0":738        reasoning = response_1.additional_kwargs["reasoning"]739        assert set(reasoning.keys()) == {"id", "type", "summary"}740        summary = reasoning["summary"]741        assert isinstance(summary, list)742        for block in summary:743            assert isinstance(block, dict)744            assert isinstance(block["type"], str)745            assert isinstance(block["text"], str)746            assert block["text"]747    elif output_version == "responses/v1":748        reasoning = next(749            block750            for block in response_1.content751            if block["type"] == "reasoning"  # type: ignore[index]752        )753        if isinstance(reasoning, str):754            reasoning = json.loads(reasoning)755        assert set(reasoning.keys()) == {"id", "type", "summary", "index"}756        summary = reasoning["summary"]757        assert isinstance(summary, list)758        for block in summary:759            assert isinstance(block, dict)760            assert isinstance(block["type"], str)761            assert isinstance(block["text"], str)762            assert block["text"]763    else:764        # v1765        total_reasoning_blocks = 0766        for block in response_1.content_blocks:767            if block["type"] == "reasoning":768                total_reasoning_blocks += 1769                assert isinstance(block.get("id"), str)770                assert block.get("id", "").startswith("rs_")771                assert isinstance(block.get("reasoning"), str)772                assert isinstance(block.get("index"), str)773        assert (774            total_reasoning_blocks > 1775        )  # This query typically generates multiple reasoning blocks776777    # Check we can pass back summaries778    message_2 = {"role": "user", "content": "Thank you."}779    response_2 = llm.invoke([message_1, response_1, message_2])780    assert isinstance(response_2, AIMessage)781782783@pytest.mark.default_cassette("test_code_interpreter.yaml.gz")784@pytest.mark.vcr785@pytest.mark.parametrize(786    ("output_version", "use_v2_stream"),787    [788        ("v0", False),789        ("responses/v1", False),790        ("v1", False),791        ("v1", True),792    ],793)794def test_code_interpreter(795    output_version: Literal["v0", "responses/v1", "v1"], use_v2_stream: bool796) -> None:797    llm = ChatOpenAI(798        model="gpt-5-nano", use_responses_api=True, output_version=output_version799    )800    llm_with_tools = llm.bind_tools(801        [{"type": "code_interpreter", "container": {"type": "auto"}}]802    )803    input_message = {804        "role": "user",805        "content": "Write and run code to answer the question: what is 3^3?",806    }807    response = llm_with_tools.invoke([input_message])808    assert isinstance(response, AIMessage)809    _check_response(response)810    if output_version == "v0":811        tool_outputs = [812            item813            for item in response.additional_kwargs["tool_outputs"]814            if item["type"] == "code_interpreter_call"815        ]816        assert len(tool_outputs) == 1817    elif output_version == "responses/v1":818        tool_outputs = [819            item820            for item in response.content821            if isinstance(item, dict) and item["type"] == "code_interpreter_call"822        ]823        assert len(tool_outputs) == 1824    else:825        # v1826        tool_outputs = [827            item828            for item in response.content_blocks829            if item["type"] == "server_tool_call" and item["name"] == "code_interpreter"830        ]831        code_interpreter_result = next(832            item833            for item in response.content_blocks834            if item["type"] == "server_tool_result"835        )836        assert tool_outputs837        assert code_interpreter_result838    assert len(tool_outputs) == 1839840    # Test streaming841    # Use same container842    container_id = tool_outputs[0].get("container_id") or tool_outputs[0].get(843        "extras", {}844    ).get("container_id")845    llm_with_tools = llm.bind_tools(846        [{"type": "code_interpreter", "container": container_id}]847    )848849    full: BaseMessage850    if use_v2_stream:851        full = cast(852            "ChatModelStream",853            llm_with_tools.stream_events([input_message], version="v3"),854        ).output855    else:856        aggregated: BaseMessageChunk | None = None857        for chunk in llm_with_tools.stream([input_message]):858            assert isinstance(chunk, AIMessageChunk)859            aggregated = chunk if aggregated is None else aggregated + chunk860        assert isinstance(aggregated, AIMessageChunk)861        full = aggregated862    if output_version == "v0":863        tool_outputs = [864            item865            for item in response.additional_kwargs["tool_outputs"]866            if item["type"] == "code_interpreter_call"867        ]868        assert tool_outputs869    elif output_version == "responses/v1":870        tool_outputs = [871            item872            for item in response.content873            if isinstance(item, dict) and item["type"] == "code_interpreter_call"874        ]875        assert tool_outputs876    else:877        # v1878        code_interpreter_call = next(879            item880            for item in full.content_blocks881            if item["type"] == "server_tool_call" and item["name"] == "code_interpreter"882        )883        code_interpreter_result = next(884            item for item in full.content_blocks if item["type"] == "server_tool_result"885        )886        assert code_interpreter_call887        assert code_interpreter_result888889    # Test we can pass back in890    next_message = {"role": "user", "content": "Please add more comments to the code."}891    _ = llm_with_tools.invoke([input_message, full, next_message])892893894@pytest.mark.vcr895def test_mcp_builtin() -> None:896    llm = ChatOpenAI(model="gpt-5-nano", use_responses_api=True, output_version="v0")897898    llm_with_tools = llm.bind_tools(899        [900            {901                "type": "mcp",902                "server_label": "deepwiki",903                "server_url": "https://mcp.deepwiki.com/mcp",904                "require_approval": {"always": {"tool_names": ["read_wiki_structure"]}},905            }906        ]907    )908    input_message = {909        "role": "user",910        "content": (911            "What transport protocols does the 2025-03-26 version of the MCP spec "912            "support?"913        ),914    }915    response = llm_with_tools.invoke([input_message])916    assert all(isinstance(block, dict) for block in response.content)917918    approval_message = HumanMessage(919        [920            {921                "type": "mcp_approval_response",922                "approve": True,923                "approval_request_id": output["id"],924            }925            for output in response.additional_kwargs["tool_outputs"]926            if output["type"] == "mcp_approval_request"927        ]928    )929    _ = llm_with_tools.invoke(930        [approval_message], previous_response_id=response.response_metadata["id"]931    )932933934@pytest.mark.vcr935def test_mcp_builtin_zdr() -> None:936    llm = ChatOpenAI(937        model="gpt-5-nano",938        use_responses_api=True,939        store=False,940        include=["reasoning.encrypted_content"],941    )942943    llm_with_tools = llm.bind_tools(944        [945            {946                "type": "mcp",947                "server_label": "deepwiki",948                "server_url": "https://mcp.deepwiki.com/mcp",949                "allowed_tools": ["ask_question"],950                "require_approval": "always",951            }952        ]953    )954    input_message = {955        "role": "user",956        "content": (957            "What transport protocols does the 2025-03-26 version of the MCP "958            "spec (modelcontextprotocol/modelcontextprotocol) support?"959        ),960    }961    full: BaseMessageChunk | None = None962    for chunk in llm_with_tools.stream([input_message]):963        assert isinstance(chunk, AIMessageChunk)964        full = chunk if full is None else full + chunk965966    assert isinstance(full, AIMessageChunk)967    assert all(isinstance(block, dict) for block in full.content)968969    approval_message = HumanMessage(970        [971            {972                "type": "mcp_approval_response",973                "approve": True,974                "approval_request_id": block["id"],  # type: ignore[index]975            }976            for block in full.content977            if block["type"] == "mcp_approval_request"  # type: ignore[index]978        ]979    )980    result = llm_with_tools.invoke([input_message, full, approval_message])981    next_message = {"role": "user", "content": "Thanks!"}982    _ = llm_with_tools.invoke(983        [input_message, full, approval_message, result, next_message]984    )985986987@pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz")988@pytest.mark.vcr989@pytest.mark.parametrize("use_v2_stream", [False, True])990def test_mcp_builtin_zdr_v1(use_v2_stream: bool) -> None:991    llm = ChatOpenAI(992        model="gpt-5-nano",993        output_version="v1",994        store=False,995        include=["reasoning.encrypted_content"],996    )997998    llm_with_tools = llm.bind_tools(999        [1000            {1001                "type": "mcp",1002                "server_label": "deepwiki",1003                "server_url": "https://mcp.deepwiki.com/mcp",1004                "allowed_tools": ["ask_question"],1005                "require_approval": "always",1006            }1007        ]1008    )1009    input_message = {1010        "role": "user",1011        "content": (1012            "What transport protocols does the 2025-03-26 version of the MCP "1013            "spec (modelcontextprotocol/modelcontextprotocol) support?"1014        ),1015    }1016    full: BaseMessage1017    if use_v2_stream:1018        full = cast(1019            "ChatModelStream",1020            llm_with_tools.stream_events([input_message], version="v3"),1021        ).output1022    else:1023        aggregated: BaseMessageChunk | None = None1024        for chunk in llm_with_tools.stream([input_message]):1025            assert isinstance(chunk, AIMessageChunk)1026            aggregated = chunk if aggregated is None else aggregated + chunk1027        assert isinstance(aggregated, AIMessageChunk)1028        full = aggregated10291030    assert isinstance(full, AIMessage)1031    assert all(isinstance(block, dict) for block in full.content)10321033    approval_message = HumanMessage(1034        [1035            {1036                "type": "non_standard",1037                "value": {1038                    "type": "mcp_approval_response",1039                    "approve": True,1040                    "approval_request_id": block["value"]["id"],  # type: ignore[index]1041                },1042            }1043            for block in full.content_blocks1044            if block["type"] == "non_standard"1045            and block["value"]["type"] == "mcp_approval_request"  # type: ignore[index]1046        ]1047    )1048    result = llm_with_tools.invoke([input_message, full, approval_message])1049    next_message = {"role": "user", "content": "Thanks!"}1050    _ = llm_with_tools.invoke(1051        [input_message, full, approval_message, result, next_message]1052    )105310541055@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz")1056@pytest.mark.vcr1057@pytest.mark.parametrize("output_version", ["v0", "responses/v1"])1058def test_image_generation_streaming(1059    output_version: Literal["v0", "responses/v1"],1060) -> None:1061    """Test image generation streaming."""1062    llm = ChatOpenAI(1063        model="gpt-4.1", use_responses_api=True, output_version=output_version1064    )1065    tool = {1066        "type": "image_generation",1067        # For testing purposes let's keep the quality low, so the test runs faster.1068        "quality": "low",1069        "output_format": "jpeg",1070        "output_compression": 100,1071        "size": "1024x1024",1072    }10731074    # Example tool output for an image1075    # {1076    #     "background": "opaque",1077    #     "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8",1078    #     "output_format": "png",1079    #     "quality": "high",1080    #     "revised_prompt": "A fluffy, fuzzy cat sitting calmly, with soft fur, bright "1081    #     "eyes, and a cute, friendly expression. The background is "1082    #     "simple and light to emphasize the cat's texture and "1083    #     "fluffiness.",1084    #     "size": "1024x1024",1085    #     "status": "completed",1086    #     "type": "image_generation_call",1087    #     "result": # base64 encode image data1088    # }10891090    expected_keys = {1091        "id",1092        "index",1093        "background",1094        "output_format",1095        "quality",1096        "result",1097        "revised_prompt",1098        "size",1099        "status",1100        "type",1101    }11021103    full: BaseMessageChunk | None = None1104    for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]):1105        assert isinstance(chunk, AIMessageChunk)1106        full = chunk if full is None else full + chunk1107    complete_ai_message = cast(AIMessageChunk, full)1108    # At the moment, the streaming API does not pick up annotations fully.1109    # So the following check is commented out.1110    # _check_response(complete_ai_message)1111    if output_version == "v0":1112        assert complete_ai_message.additional_kwargs["tool_outputs"]1113        tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]1114        assert set(tool_output.keys()).issubset(expected_keys)1115    else:1116        # "responses/v1"1117        tool_output = next(1118            block1119            for block in complete_ai_message.content1120            if isinstance(block, dict) and block["type"] == "image_generation_call"1121        )1122        assert set(tool_output.keys()).issubset(expected_keys)112311241125@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz")1126@pytest.mark.vcr1127def test_image_generation_streaming_v1() -> None:1128    """Test image generation streaming."""1129    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1")1130    tool = {1131        "type": "image_generation",1132        "quality": "low",1133        "output_format": "jpeg",1134        "output_compression": 100,1135        "size": "1024x1024",1136    }11371138    standard_keys = {"type", "base64", "mime_type", "id", "index"}1139    extra_keys = {1140        "background",1141        "output_format",1142        "quality",1143        "revised_prompt",1144        "size",1145        "status",1146    }11471148    full: BaseMessageChunk | None = None1149    for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]):1150        assert isinstance(chunk, AIMessageChunk)1151        full = chunk if full is None else full + chunk1152    complete_ai_message = cast(AIMessageChunk, full)11531154    tool_output = next(1155        block1156        for block in complete_ai_message.content1157        if isinstance(block, dict) and block["type"] == "image"1158    )1159    assert set(standard_keys).issubset(tool_output.keys())1160    assert set(extra_keys).issubset(tool_output["extras"].keys())116111621163@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz")1164@pytest.mark.vcr1165@pytest.mark.parametrize("output_version", ["v0", "responses/v1"])1166def test_image_generation_multi_turn(1167    output_version: Literal["v0", "responses/v1"],1168) -> None:1169    """Test multi-turn editing of image generation by passing in history."""1170    # Test multi-turn1171    llm = ChatOpenAI(1172        model="gpt-4.1", use_responses_api=True, output_version=output_version1173    )1174    # Test invocation1175    tool = {1176        "type": "image_generation",1177        # For testing purposes let's keep the quality low, so the test runs faster.1178        "quality": "low",1179        "output_format": "jpeg",1180        "output_compression": 100,1181        "size": "1024x1024",1182    }1183    llm_with_tools = llm.bind_tools([tool])11841185    chat_history: list[MessageLikeRepresentation] = [1186        {"role": "user", "content": "Draw a random short word in green font."}1187    ]1188    ai_message = llm_with_tools.invoke(chat_history)1189    assert isinstance(ai_message, AIMessage)1190    _check_response(ai_message)11911192    expected_keys = {1193        "id",1194        "background",1195        "output_format",1196        "quality",1197        "result",1198        "revised_prompt",1199        "size",1200        "status",1201        "type",1202    }12031204    if output_version == "v0":1205        tool_output = ai_message.additional_kwargs["tool_outputs"][0]1206        assert set(tool_output.keys()).issubset(expected_keys)1207    elif output_version == "responses/v1":1208        tool_output = next(1209            block1210            for block in ai_message.content1211            if isinstance(block, dict) and block["type"] == "image_generation_call"1212        )1213        assert set(tool_output.keys()).issubset(expected_keys)1214    else:1215        standard_keys = {"type", "base64", "id", "status"}1216        tool_output = next(1217            block1218            for block in ai_message.content1219            if isinstance(block, dict) and block["type"] == "image"1220        )1221        assert set(standard_keys).issubset(tool_output.keys())12221223    # Example tool output for an image (v0)1224    # {1225    #     "background": "opaque",1226    #     "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8",1227    #     "output_format": "png",1228    #     "quality": "high",1229    #     "revised_prompt": "A fluffy, fuzzy cat sitting calmly, with soft fur, bright "1230    #     "eyes, and a cute, friendly expression. The background is "1231    #     "simple and light to emphasize the cat's texture and "1232    #     "fluffiness.",1233    #     "size": "1024x1024",1234    #     "status": "completed",1235    #     "type": "image_generation_call",1236    #     "result": # base64 encode image data1237    # }12381239    chat_history.extend(1240        [1241            # AI message with tool output1242            ai_message,1243            # New request1244            {1245                "role": "user",1246                "content": (1247                    "Now, change the font to blue. Keep the word and everything else "1248                    "the same."1249                ),1250            },1251        ]1252    )12531254    ai_message2 = llm_with_tools.invoke(chat_history)1255    assert isinstance(ai_message2, AIMessage)1256    _check_response(ai_message2)12571258    if output_version == "v0":1259        tool_output = ai_message2.additional_kwargs["tool_outputs"][0]1260        assert set(tool_output.keys()).issubset(expected_keys)1261    else:1262        # "responses/v1"1263        tool_output = next(1264            block1265            for block in ai_message2.content1266            if isinstance(block, dict) and block["type"] == "image_generation_call"1267        )1268        assert set(tool_output.keys()).issubset(expected_keys)126912701271@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz")1272@pytest.mark.vcr1273def test_image_generation_multi_turn_v1() -> None:1274    """Test multi-turn editing of image generation by passing in history."""1275    # Test multi-turn1276    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1")1277    # Test invocation1278    tool = {1279        "type": "image_generation",1280        "quality": "low",1281        "output_format": "jpeg",1282        "output_compression": 100,1283        "size": "1024x1024",1284    }1285    llm_with_tools = llm.bind_tools([tool])12861287    chat_history: list[MessageLikeRepresentation] = [1288        {"role": "user", "content": "Draw a random short word in green font."}1289    ]1290    ai_message = llm_with_tools.invoke(chat_history)1291    assert isinstance(ai_message, AIMessage)1292    _check_response(ai_message)12931294    standard_keys = {"type", "base64", "mime_type", "id"}1295    extra_keys = {1296        "background",1297        "output_format",1298        "quality",1299        "revised_prompt",1300        "size",1301        "status",1302    }13031304    tool_output = next(1305        block1306        for block in ai_message.content1307        if isinstance(block, dict) and block["type"] == "image"1308    )1309    assert set(standard_keys).issubset(tool_output.keys())1310    assert set(extra_keys).issubset(tool_output["extras"].keys())13111312    chat_history.extend(1313        [1314            # AI message with tool output1315            ai_message,1316            # New request1317            {1318                "role": "user",1319                "content": (1320                    "Now, change the font to blue. Keep the word and everything else "1321                    "the same."1322                ),1323            },1324        ]1325    )13261327    ai_message2 = llm_with_tools.invoke(chat_history)1328    assert isinstance(ai_message2, AIMessage)1329    _check_response(ai_message2)13301331    tool_output = next(1332        block1333        for block in ai_message2.content1334        if isinstance(block, dict) and block["type"] == "image"1335    )1336    assert set(standard_keys).issubset(tool_output.keys())1337    assert set(extra_keys).issubset(tool_output["extras"].keys())133813391340def test_verbosity_parameter() -> None:1341    """Test verbosity parameter with Responses API.13421343    Tests that the verbosity parameter works correctly with the OpenAI Responses API.13441345    """1346    llm = ChatOpenAI(model=MODEL_NAME, verbosity="medium", use_responses_api=True)1347    response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")])13481349    assert isinstance(response, AIMessage)1350    assert response.content135113521353@pytest.mark.default_cassette("test_custom_tool.yaml.gz")1354@pytest.mark.vcr1355@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1356def test_custom_tool(output_version: Literal["responses/v1", "v1"]) -> None:1357    @custom_tool1358    def execute_code(code: str) -> str:1359        """Execute python code."""1360        return "27"13611362    llm = ChatOpenAI(model="gpt-5", output_version=output_version).bind_tools(1363        [execute_code]1364    )13651366    input_message = {"role": "user", "content": "Use the tool to evaluate 3^3."}1367    tool_call_message = llm.invoke([input_message])1368    assert isinstance(tool_call_message, AIMessage)1369    assert len(tool_call_message.tool_calls) == 11370    tool_call = tool_call_message.tool_calls[0]1371    tool_message = execute_code.invoke(tool_call)1372    response = llm.invoke([input_message, tool_call_message, tool_message])1373    assert isinstance(response, AIMessage)13741375    # Test streaming1376    full: BaseMessageChunk | None = None1377    for chunk in llm.stream([input_message]):1378        assert isinstance(chunk, AIMessageChunk)1379        full = chunk if full is None else full + chunk1380    assert isinstance(full, AIMessageChunk)1381    assert len(full.tool_calls) == 1138213831384@pytest.mark.default_cassette("test_compaction.yaml.gz")1385@pytest.mark.vcr1386@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1387def test_compaction(output_version: Literal["responses/v1", "v1"]) -> None:1388    """Test the compaction beta feature."""1389    llm = ChatOpenAI(1390        model="gpt-5.2",1391        context_management=[{"type": "compaction", "compact_threshold": 10_000}],1392        output_version=output_version,1393    )13941395    input_message = {1396        "role": "user",1397        "content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",1398    }1399    messages: list = [input_message]14001401    first_response = llm.invoke(messages)1402    messages.append(first_response)14031404    second_message = {1405        "role": "user",1406        "content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",1407    }1408    messages.append(second_message)14091410    second_response = llm.invoke(messages)1411    messages.append(second_response)14121413    content_blocks = second_response.content_blocks1414    compaction_block = next(1415        (block for block in content_blocks if block["type"] == "non_standard"),1416        None,1417    )1418    assert compaction_block1419    assert compaction_block["value"].get("type") == "compaction"14201421    third_message = {1422        "role": "user",1423        "content": "What are we talking about?",1424    }1425    messages.append(third_message)1426    third_response = llm.invoke(messages)1427    assert third_response.text142814291430@pytest.mark.default_cassette("test_compaction_streaming.yaml.gz")1431@pytest.mark.vcr1432@pytest.mark.parametrize(1433    ("output_version", "use_v2_stream"),1434    [1435        ("responses/v1", False),1436        ("v1", False),1437        ("v1", True),1438    ],1439)1440def test_compaction_streaming(1441    output_version: Literal["responses/v1", "v1"], use_v2_stream: bool1442) -> None:1443    """Test the compaction beta feature."""1444    llm = ChatOpenAI(1445        model="gpt-5.2",1446        context_management=[{"type": "compaction", "compact_threshold": 10_000}],1447        output_version=output_version,1448        streaming=True,1449    )14501451    def _run(messages: list) -> AIMessage:1452        if use_v2_stream:1453            return llm.stream_events(messages, version="v3").output1454        result = llm.invoke(messages)1455        assert isinstance(result, AIMessage)1456        return result14571458    input_message = {1459        "role": "user",1460        "content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",1461    }1462    messages: list = [input_message]14631464    first_response = _run(messages)1465    messages.append(first_response)14661467    second_message = {1468        "role": "user",1469        "content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",1470    }1471    messages.append(second_message)14721473    second_response = _run(messages)1474    messages.append(second_response)14751476    content_blocks = second_response.content_blocks1477    compaction_block = next(1478        (block for block in content_blocks if block["type"] == "non_standard"),1479        None,1480    )1481    assert compaction_block1482    assert compaction_block["value"].get("type") == "compaction"14831484    third_message = {1485        "role": "user",1486        "content": "What are we talking about?",1487    }1488    messages.append(third_message)1489    third_response = _run(messages)1490    assert third_response.text149114921493def test_csv_input() -> None:1494    """Test CSV file input with both LangChain standard and OpenAI native formats."""1495    # Create sample CSV content1496    csv_content = (1497        "name,age,city\nAlice,30,New York\nBob,25,Los Angeles\nCarol,35,Chicago"1498    )1499    csv_bytes = csv_content.encode("utf-8")1500    base64_string = base64.b64encode(csv_bytes).decode("utf-8")15011502    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)15031504    # Test LangChain standard format1505    langchain_message = {1506        "role": "user",1507        "content": [1508            {1509                "type": "text",1510                "text": "How many people are in this CSV file?",1511            },1512            {1513                "type": "file",1514                "base64": base64_string,1515                "mime_type": "text/csv",1516                "filename": "people.csv",1517            },1518        ],1519    }1520    payload = llm._get_request_payload([langchain_message])1521    block = payload["input"][0]["content"][1]1522    assert block["type"] == "input_file"15231524    response = llm.invoke([langchain_message])1525    assert isinstance(response, AIMessage)1526    assert response.content1527    assert (1528        "3" in str(response.content).lower() or "three" in str(response.content).lower()1529    )15301531    # Test OpenAI native format1532    openai_message = {1533        "role": "user",1534        "content": [1535            {1536                "type": "text",1537                "text": "How many people are in this CSV file?",1538            },1539            {1540                "type": "input_file",1541                "filename": "people.csv",1542                "file_data": f"data:text/csv;base64,{base64_string}",1543            },1544        ],1545    }1546    payload2 = llm._get_request_payload([openai_message])1547    block2 = payload2["input"][0]["content"][1]1548    assert block2["type"] == "input_file"15491550    response2 = llm.invoke([openai_message])1551    assert isinstance(response2, AIMessage)1552    assert response2.content1553    assert (1554        "3" in str(response2.content).lower()1555        or "three" in str(response2.content).lower()1556    )155715581559@pytest.mark.default_cassette("test_phase.yaml.gz")1560@pytest.mark.vcr1561@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1562def test_phase(output_version: str) -> None:1563    def get_weather(location: str) -> str:1564        """Get the weather at a location."""1565        return "It's sunny."15661567    model = ChatOpenAI(1568        model="gpt-5.4",1569        use_responses_api=True,1570        verbosity="high",1571        reasoning={"effort": "medium", "summary": "auto"},1572        output_version=output_version,1573    )15741575    agent = create_agent(model, tools=[get_weather])15761577    input_message = {1578        "role": "user",1579        "content": (1580            "What's the weather in the oldest major city in the US? State your answer "1581            "and then generate a tool call this turn."1582        ),1583    }1584    result = agent.invoke({"messages": [input_message]})1585    first_response = result["messages"][1]1586    text_block = next(1587        block for block in first_response.content if block["type"] == "text"1588    )1589    assert text_block["phase"] == "commentary"15901591    final_response = result["messages"][-1]1592    text_block = next(1593        block for block in final_response.content if block["type"] == "text"1594    )1595    assert text_block["phase"] == "final_answer"159615971598@pytest.mark.default_cassette("test_phase_streaming.yaml.gz")1599@pytest.mark.vcr1600@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1601def test_phase_streaming(output_version: str) -> None:1602    def get_weather(location: str) -> str:1603        """Get the weather at a location."""1604        return "It's sunny."16051606    model = ChatOpenAI(1607        model="gpt-5.4",1608        use_responses_api=True,1609        verbosity="high",1610        reasoning={"effort": "medium", "summary": "auto"},1611        streaming=True,1612        output_version=output_version,1613    )16141615    agent = create_agent(model, tools=[get_weather])16161617    input_message = {1618        "role": "user",1619        "content": (1620            "What's the weather in the oldest major city in the US? State your answer "1621            "and then generate a tool call this turn."1622        ),1623    }1624    result = agent.invoke({"messages": [input_message]})1625    first_response = result["messages"][1]1626    if output_version == "responses/v1":1627        assert [block["type"] for block in first_response.content] == [1628            "reasoning",1629            "text",1630            "function_call",1631        ]1632    else:1633        assert [block["type"] for block in first_response.content] == [1634            "reasoning",1635            "text",1636            "tool_call",1637        ]1638    text_block = next(1639        block for block in first_response.content if block["type"] == "text"1640    )1641    assert text_block["phase"] == "commentary"16421643    final_response = result["messages"][-1]1644    assert [block["type"] for block in final_response.content] == ["text"]1645    text_block = next(1646        block for block in final_response.content if block["type"] == "text"1647    )1648    assert text_block["phase"] == "final_answer"164916501651@pytest.mark.default_cassette("test_tool_search.yaml.gz")1652@pytest.mark.vcr1653@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1654def test_tool_search(output_version: str) -> None:1655    @tool(extras={"defer_loading": True})1656    def get_weather(location: str) -> str:1657        """Get the current weather for a location."""1658        return f"The weather in {location} is sunny and 72°F"16591660    @tool(extras={"defer_loading": True})1661    def get_recipe(query: str) -> None:1662        """Get a recipe for chicken soup."""16631664    model = ChatOpenAI(1665        model="gpt-5.4",1666        use_responses_api=True,1667        output_version=output_version,1668    )16691670    agent = create_agent(1671        model=model,1672        tools=[get_weather, get_recipe, {"type": "tool_search"}],1673    )1674    input_message = {"role": "user", "content": "What's the weather in San Francisco?"}1675    result = agent.invoke({"messages": [input_message]})1676    assert len(result["messages"]) == 41677    tool_call_message = result["messages"][1]1678    assert isinstance(tool_call_message, AIMessage)1679    assert tool_call_message.tool_calls1680    if output_version == "v1":1681        assert [block["type"] for block in tool_call_message.content] == [  # type: ignore[index]1682            "server_tool_call",1683            "server_tool_result",1684            "tool_call",1685        ]1686    else:1687        assert [block["type"] for block in tool_call_message.content] == [  # type: ignore[index]1688            "tool_search_call",1689            "tool_search_output",1690            "function_call",1691        ]16921693    assert isinstance(result["messages"][2], ToolMessage)16941695    assert result["messages"][3].text169616971698@pytest.mark.default_cassette("test_tool_search_streaming.yaml.gz")1699@pytest.mark.vcr1700@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1701def test_tool_search_streaming(output_version: str) -> None:1702    @tool(extras={"defer_loading": True})1703    def get_weather(location: str) -> str:1704        """Get the current weather for a location."""1705        return f"The weather in {location} is sunny and 72°F"17061707    @tool(extras={"defer_loading": True})1708    def get_recipe(query: str) -> None:1709        """Get a recipe for chicken soup."""17101711    model = ChatOpenAI(1712        model="gpt-5.4",1713        use_responses_api=True,1714        streaming=True,1715        output_version=output_version,1716    )17171718    agent = create_agent(1719        model=model,1720        tools=[get_weather, get_recipe, {"type": "tool_search"}],1721    )1722    input_message = {"role": "user", "content": "What's the weather in San Francisco?"}1723    result = agent.invoke({"messages": [input_message]})1724    assert len(result["messages"]) == 41725    tool_call_message = result["messages"][1]1726    assert isinstance(tool_call_message, AIMessage)1727    assert tool_call_message.tool_calls1728    if output_version == "v1":1729        assert [block["type"] for block in tool_call_message.content] == [  # type: ignore[index]1730            "server_tool_call",1731            "server_tool_result",1732            "tool_call",1733        ]1734    else:1735        assert [block["type"] for block in tool_call_message.content] == [  # type: ignore[index]1736            "tool_search_call",1737            "tool_search_output",1738            "function_call",1739        ]17401741    assert isinstance(result["messages"][2], ToolMessage)17421743    assert result["messages"][3].text174417451746@pytest.mark.vcr1747def test_client_executed_tool_search() -> None:1748    @tool1749    def get_weather(location: str) -> str:1750        """Get the current weather for a location."""1751        return f"The weather in {location} is sunny and 72°F"17521753    def search_tools(goal: str) -> list[dict]:1754        """Search for available tools to help answer the question."""1755        return [1756            {1757                "type": "function",1758                "defer_loading": True,1759                **convert_to_openai_tool(get_weather)["function"],1760            }1761        ]17621763    tool_search_schema = convert_to_openai_tool(search_tools, strict=True)1764    tool_search_config: dict = {1765        "type": "tool_search",1766        "execution": "client",1767        "description": tool_search_schema["function"]["description"],1768        "parameters": tool_search_schema["function"]["parameters"],1769    }17701771    class ClientToolSearchMiddleware(AgentMiddleware):1772        @hook_config(can_jump_to=["model"])1773        def after_model(self, state: AgentState, runtime: Any) -> dict[str, Any] | None:1774            last_message = state["messages"][-1]1775            if not isinstance(last_message, AIMessage):1776                return None1777            for block in last_message.content:1778                if isinstance(block, dict) and block.get("type") == "tool_search_call":1779                    call_id = block.get("call_id")1780                    args = block.get("arguments", {})1781                    goal = args.get("goal", "") if isinstance(args, dict) else ""1782                    loaded_tools = search_tools(goal)1783                    tool_search_output = {1784                        "type": "tool_search_output",1785                        "execution": "client",1786                        "call_id": call_id,1787                        "status": "completed",1788                        "tools": loaded_tools,1789                    }1790                    return {1791                        "messages": [HumanMessage(content=[tool_search_output])],1792                        "jump_to": "model",1793                    }1794            return None17951796        def wrap_tool_call(1797            self,1798            request: ToolCallRequest,1799            handler: Any,1800        ) -> Any:1801            if request.tool_call["name"] == "get_weather":1802                return handler(request.override(tool=get_weather))1803            return handler(request)18041805    llm = ChatOpenAI(model="gpt-5.4", use_responses_api=True)18061807    agent = create_agent(1808        model=llm,1809        tools=[tool_search_config],1810        middleware=[ClientToolSearchMiddleware()],1811    )18121813    result = agent.invoke(1814        {"messages": [HumanMessage("What's the weather in San Francisco?")]}1815    )1816    messages = result["messages"]1817    search_tool_call = messages[1]1818    assert search_tool_call.content[0]["type"] == "tool_search_call"18191820    search_tool_output = messages[2]1821    assert search_tool_output.content[0]["type"] == "tool_search_output"18221823    tool_call = messages[3]1824    assert tool_call.tool_calls18251826    assert isinstance(messages[4], ToolMessage)18271828    assert messages[5].text182918301831@pytest.mark.default_cassette("test_reasoning_text_v1_v2_parity.yaml.gz")1832@pytest.mark.vcr1833def test_reasoning_text_v1_v2_parity() -> None:1834    """`stream()` and `stream_events(version="v3")` agree on reasoning + text.18351836    Exercises the non-tool-call branch of the parity claim: a reasoning1837    model (`gpt-5-nano` via the Responses API) produces one or more1838    `reasoning` blocks followed by a `text` block. Both paths replay the1839    same recorded HTTP response (cassette with `allow_playback_repeats`),1840    so any remaining divergence is a library issue.1841    """1842    llm = ChatOpenAI(1843        model="gpt-5-nano",1844        reasoning={"effort": "low", "summary": "auto"},1845        output_version="v1",1846    )1847    prompt = {"role": "user", "content": "What is the capital of France?"}18481849    v1: AIMessageChunk | None = None1850    for chunk in llm.stream([prompt]):1851        assert isinstance(chunk, AIMessageChunk)1852        v1 = chunk if v1 is None else v1 + chunk1853    assert isinstance(v1, AIMessageChunk)18541855    stream = llm.stream_events([prompt], version="v3")1856    events = list(stream)1857    assert_valid_event_stream(events)1858    v2 = stream.output1859    assert isinstance(v2, AIMessage)18601861    # No tool calls on either path.1862    assert v1.tool_calls == v2.tool_calls == []1863    assert v1.invalid_tool_calls == v2.invalid_tool_calls == []1864    assert v1.additional_kwargs == v2.additional_kwargs18651866    # Content structure must match: same block sequence, same accumulated1867    # text and reasoning payloads, same block identifiers. `content_blocks`1868    # is the v1-shaped projection and is canonical for both paths.1869    assert v1.content_blocks == v2.content_blocks1870    assert v1.content == v2.content1871    # Sanity-check that we actually exercised the reasoning + text path.1872    block_types = [b["type"] for b in v1.content_blocks]1873    assert "reasoning" in block_types1874    assert "text" in block_types18751876    # Usage: core counts must match; provider detail subdicts are1877    # dropped by `_to_protocol_usage` because `langchain_protocol.UsageInfo`1878    # doesn't list them. Tracked as a protocol-repo change.1879    detail_keys = {"input_token_details", "output_token_details"}1880    v1_usage = {1881        k: v for k, v in (v1.usage_metadata or {}).items() if k not in detail_keys1882    }1883    v2_usage = {1884        k: v for k, v in (v2.usage_metadata or {}).items() if k not in detail_keys1885    }1886    assert v1_usage == v2_usage18871888    # Response metadata must match. The Responses API doesn't put1889    # `finish_reason` in per-chunk metadata, so neither the v1 reduction1890    # nor the v2 bridge ends up with one. (Protocol 0.0.10 dropped the1891    # v2 bridge's default `"stop"` synthesis; provider metadata now1892    # passes through unchanged.)1893    assert v1.response_metadata == v2.response_metadata

Code quality findings 100

Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response.content, list)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block, dict)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block.get("text"), str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(text_content, str)
Ensure functions have docstrings for documentation
missing-docstring
def test_incomplete_response() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
def test_web_search(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
async def test_web_search_async() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(block, dict) and block["type"] == "apply_patch_call"
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(aggregated, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
isinstance(block, dict) and block["type"] == "apply_patch_call"
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(follow_up, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
def test_agent_loop(output_version: Literal["responses/v1", "v1"]) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_call_message, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_message, ToolMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_agent_loop_streaming(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_call_message, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_message, ToolMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_call_message, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(tool_message, ToolMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_parsed_pydantic_schema(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
async def test_parsed_pydantic_schema_async() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
def test_parsed_dict_schema(schema: Any) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(parsed["response"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(parsed["response"], str)
Ensure functions have docstrings for documentation
missing-docstring
def test_parsed_strict() -> None:
Ensure functions have docstrings for documentation
missing-docstring
async def test_parsed_dict_schema_async(schema: Any) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(parsed["response"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(parsed["response"], str)
Ensure functions have docstrings for documentation
missing-docstring
def test_function_calling_and_structured_output(schema: Any) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_stateful_api() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(second_response.content, list)
Ensure functions have docstrings for documentation
missing-docstring
def test_route_from_model_kwargs() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_computer_calls() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_file_search(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
def test_stream_reasoning_summary(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(aggregated, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(summary, list)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block, dict)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block["type"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block["text"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(reasoning, str):
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(summary, list)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block, dict)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block["type"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block["text"], str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block.get("id"), str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block.get("reasoning"), str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(block.get("index"), str)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response_2, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_code_interpreter(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(response, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(item, dict) and item["type"] == "code_interpreter_call"
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(aggregated, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(item, dict) and item["type"] == "code_interpreter_call"
Ensure functions have docstrings for documentation
missing-docstring
def test_mcp_builtin() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_mcp_builtin_zdr() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessageChunk)
Ensure functions have docstrings for documentation
missing-docstring
def test_mcp_builtin_zdr_v1(use_v2_stream: bool) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(aggregated, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(full, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_image_generation_streaming(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(block, dict) and block["type"] == "image_generation_call"
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(chunk, AIMessageChunk)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(block, dict) and block["type"] == "image"
Ensure functions have docstrings for documentation
missing-docstring
def test_image_generation_multi_turn(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(ai_message, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(block, dict) and block["type"] == "image_generation_call"
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(block, dict) and block["type"] == "image"

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.