Overuse may indicate design issues; consider polymorphism
assert isinstance(response, AIMessage)
1"""Test Responses API usage."""23import base644import json5import os6from typing import TYPE_CHECKING, Annotated, Any, Literal, cast78import openai9import pytest10from langchain.agents import create_agent11from langchain.agents.middleware.types import (12 AgentMiddleware,13 AgentState,14 ToolCallRequest,15 hook_config,16)17from langchain_core.messages import (18 AIMessage,19 AIMessageChunk,20 BaseMessage,21 BaseMessageChunk,22 HumanMessage,23 MessageLikeRepresentation,24 ToolMessage,25)26from langchain_core.tools import tool27from langchain_core.utils.function_calling import convert_to_openai_tool28from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream29from pydantic import BaseModel30from typing_extensions import TypedDict3132from langchain_openai import ChatOpenAI, custom_tool33from langchain_openai.chat_models.base import _convert_to_openai_response_format3435if TYPE_CHECKING:36 from collections.abc import Awaitable3738 from langchain_core.language_models.chat_model_stream import (39 AsyncChatModelStream,40 ChatModelStream,41 )4243MODEL_NAME = "gpt-4o-mini"444546def _check_response(response: BaseMessage | None) -> None:47 assert isinstance(response, AIMessage)48 assert isinstance(response.content, list)49 for block in response.content:50 assert isinstance(block, dict)51 if block["type"] == "text":52 assert isinstance(block.get("text"), str)53 annotations = block.get("annotations", [])54 for annotation in annotations:55 if annotation["type"] == "file_citation":56 assert all(57 key in annotation58 for key in ["file_id", "filename", "file_index", "type"]59 )60 elif annotation["type"] == "web_search":61 assert all(62 key in annotation63 for key in ["end_index", "start_index", "title", "type", "url"]64 )65 elif annotation["type"] == "citation":66 assert all(key in annotation for key in ["title", "type"])67 if "url" in annotation:68 assert "start_index" in annotation69 assert "end_index" in annotation70 text_content = response.text # type: ignore[operator,misc]71 assert isinstance(text_content, str)72 assert text_content73 assert response.usage_metadata74 assert response.usage_metadata["input_tokens"] > 075 assert response.usage_metadata["output_tokens"] > 076 assert response.usage_metadata["total_tokens"] > 077 assert response.response_metadata["model_name"]78 assert response.response_metadata["service_tier"] # type: ignore[typeddict-item]798081@pytest.mark.vcr82def test_incomplete_response() -> None:83 model = ChatOpenAI(84 model=MODEL_NAME, use_responses_api=True, max_completion_tokens=1685 )86 response = model.invoke("Tell me a 100 word story about a bear.")87 assert response.response_metadata["incomplete_details"]88 assert response.response_metadata["incomplete_details"]["reason"]89 assert response.response_metadata["status"] == "incomplete"9091 full: AIMessageChunk | None = None92 for chunk in model.stream("Tell me a 100 word story about a bear."):93 assert isinstance(chunk, AIMessageChunk)94 full = chunk if full is None else full + chunk95 assert isinstance(full, AIMessageChunk)96 assert full.response_metadata["incomplete_details"]97 assert full.response_metadata["incomplete_details"]["reason"]98 assert full.response_metadata["status"] == "incomplete"99100101@pytest.mark.default_cassette("test_web_search.yaml.gz")102@pytest.mark.vcr103@pytest.mark.parametrize(104 ("output_version", "use_v2_stream"),105 [106 ("responses/v1", False),107 ("v1", False),108 ("v1", True),109 ],110)111def test_web_search(112 output_version: Literal["responses/v1", "v1"], use_v2_stream: bool113) -> None:114 llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)115 first_response = llm.invoke(116 "What was a positive news story from today?",117 tools=[{"type": "web_search_preview"}],118 )119 _check_response(first_response)120121 # Test streaming122 full: BaseMessage123 if use_v2_stream:124 full = llm.stream_events(125 "What was a positive news story from today?",126 tools=[{"type": "web_search_preview"}],127 version="v3",128 ).output129 else:130 aggregated: BaseMessageChunk | None = None131 for chunk in llm.stream(132 "What was a positive news story from today?",133 tools=[{"type": "web_search_preview"}],134 ):135 assert isinstance(chunk, AIMessageChunk)136 aggregated = chunk if aggregated is None else aggregated + chunk137 assert aggregated is not None138 full = aggregated139 _check_response(full)140141 # Use OpenAI's stateful API142 response = llm.invoke(143 "what about a negative one",144 tools=[{"type": "web_search_preview"}],145 previous_response_id=first_response.response_metadata["id"],146 )147 _check_response(response)148149 # Manually pass in chat history150 response = llm.invoke(151 [152 {"role": "user", "content": "What was a positive news story from today?"},153 first_response,154 {"role": "user", "content": "what about a negative one"},155 ],156 tools=[{"type": "web_search_preview"}],157 )158 _check_response(response)159160 # Bind tool161 response = llm.bind_tools([{"type": "web_search_preview"}]).invoke(162 "What was a positive news story from today?"163 )164 _check_response(response)165166 for msg in [first_response, full, response]:167 assert msg is not None168 block_types = [block["type"] for block in msg.content] # type: ignore[index]169 if output_version == "responses/v1":170 assert block_types == ["web_search_call", "text"]171 else:172 assert block_types == ["server_tool_call", "server_tool_result", "text"]173174175@pytest.mark.flaky(retries=3, delay=1)176async def test_web_search_async() -> None:177 llm = ChatOpenAI(model=MODEL_NAME, output_version="v0")178 response = await llm.ainvoke(179 "What was a positive news story from today?",180 tools=[{"type": "web_search_preview"}],181 )182 _check_response(response)183 assert response.response_metadata["status"]184185 # Test streaming186 full: BaseMessageChunk | None = None187 async for chunk in llm.astream(188 "What was a positive news story from today?",189 tools=[{"type": "web_search_preview"}],190 ):191 assert isinstance(chunk, AIMessageChunk)192 full = chunk if full is None else full + chunk193 assert isinstance(full, AIMessageChunk)194 _check_response(full)195196 for msg in [response, full]:197 assert msg.additional_kwargs["tool_outputs"]198 assert len(msg.additional_kwargs["tool_outputs"]) == 1199 tool_output = msg.additional_kwargs["tool_outputs"][0]200 assert tool_output["type"] == "web_search_call"201202203@pytest.mark.default_cassette("test_apply_patch.yaml.gz")204@pytest.mark.vcr205def test_apply_patch() -> None:206 """Test the apply_patch built-in tool end-to-end.207208 apply_patch is a client-executed tool: the model proposes a file operation209 via an `apply_patch_call` block, the client applies it, and the result is210 returned as an `apply_patch_call_output` block. Requires a model that211 supports the tool.212 """213 prompt = "Create a new file named hello.txt containing the line: hello world"214 llm = ChatOpenAI(model="gpt-5.1", output_version="responses/v1")215 tool = {"type": "apply_patch"}216217 # Non-streaming: the model should emit an apply_patch_call block.218 response = llm.invoke(prompt, tools=[tool])219 assert isinstance(response, AIMessage)220 calls = [221 block222 for block in response.content223 if isinstance(block, dict) and block["type"] == "apply_patch_call"224 ]225 assert len(calls) == 1226 call = calls[0]227 assert call["call_id"]228 assert call["operation"]["type"] in ("create_file", "update_file", "delete_file")229230 # Streaming: the apply_patch_call block survives chunk aggregation.231 aggregated: BaseMessageChunk | None = None232 for chunk in llm.stream(prompt, tools=[tool]):233 assert isinstance(chunk, AIMessageChunk)234 aggregated = chunk if aggregated is None else aggregated + chunk235 assert isinstance(aggregated, AIMessageChunk)236 assert any(237 isinstance(block, dict) and block["type"] == "apply_patch_call"238 for block in aggregated.content239 )240241 # Round-trip: return an apply_patch_call_output and continue the conversation.242 output_message = HumanMessage(243 content=[244 {245 "type": "apply_patch_call_output",246 "call_id": call["call_id"],247 "status": "completed",248 "output": f"Created {call['operation']['path']}",249 }250 ]251 )252 follow_up = llm.invoke(253 [HumanMessage(prompt), response, output_message],254 tools=[tool],255 )256 assert isinstance(follow_up, AIMessage)257258259@pytest.mark.default_cassette("test_function_calling.yaml.gz")260@pytest.mark.vcr261@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])262def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None:263 def multiply(x: int, y: int) -> int:264 """return x * y"""265 return x * y266267 llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)268 bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])269 ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))270 assert len(ai_msg.tool_calls) == 1271 assert ai_msg.tool_calls[0]["name"] == "multiply"272 assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}273274 full: Any = None275 for chunk in bound_llm.stream("whats 5 * 4"):276 assert isinstance(chunk, AIMessageChunk)277 full = chunk if full is None else full + chunk278 assert len(full.tool_calls) == 1279 assert full.tool_calls[0]["name"] == "multiply"280 assert set(full.tool_calls[0]["args"]) == {"x", "y"}281282 for msg in [ai_msg, full]:283 assert len(msg.content_blocks) == 1284 assert msg.content_blocks[0]["type"] == "tool_call"285286 response = bound_llm.invoke("What was a positive news story from today?")287 _check_response(response)288289290@pytest.mark.default_cassette("test_agent_loop.yaml.gz")291@pytest.mark.vcr292@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])293def test_agent_loop(output_version: Literal["responses/v1", "v1"]) -> None:294 @tool295 def get_weather(location: str) -> str:296 """Get the weather for a location."""297 return "It's sunny."298299 llm = ChatOpenAI(300 model="gpt-5.4",301 use_responses_api=True,302 output_version=output_version,303 )304 llm_with_tools = llm.bind_tools([get_weather])305 input_message = HumanMessage("What is the weather in San Francisco, CA?")306 tool_call_message = llm_with_tools.invoke([input_message])307 assert isinstance(tool_call_message, AIMessage)308 tool_calls = tool_call_message.tool_calls309 assert len(tool_calls) == 1310 tool_call = tool_calls[0]311 tool_message = get_weather.invoke(tool_call)312 assert isinstance(tool_message, ToolMessage)313 response = llm_with_tools.invoke(314 [315 input_message,316 tool_call_message,317 tool_message,318 ]319 )320 assert isinstance(response, AIMessage)321322323@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")324@pytest.mark.vcr325@pytest.mark.parametrize(326 ("output_version", "use_v2_stream"),327 [328 ("responses/v1", False),329 ("responses/v1", True),330 ("v1", False),331 ("v1", True),332 ],333)334def test_agent_loop_streaming(335 output_version: Literal["responses/v1", "v1"], use_v2_stream: bool336) -> None:337 @tool338 def get_weather(location: str) -> str:339 """Get the weather for a location."""340 return "It's sunny."341342 llm = ChatOpenAI(343 model="gpt-5.2",344 use_responses_api=True,345 reasoning={"effort": "medium", "summary": "auto"},346 streaming=True,347 output_version=output_version,348 )349 llm_with_tools = llm.bind_tools([get_weather])350 input_message = HumanMessage("What is the weather in San Francisco, CA?")351 if use_v2_stream:352 tool_call_message = cast(353 "ChatModelStream",354 llm_with_tools.stream_events([input_message], version="v3"),355 ).output356 else:357 tool_call_message = llm_with_tools.invoke([input_message])358 assert isinstance(tool_call_message, AIMessage)359 tool_calls = tool_call_message.tool_calls360 assert len(tool_calls) == 1361 tool_call = tool_calls[0]362 tool_message = get_weather.invoke(tool_call)363 assert isinstance(tool_message, ToolMessage)364 if use_v2_stream:365 response = cast(366 "ChatModelStream",367 llm_with_tools.stream_events(368 [input_message, tool_call_message, tool_message],369 version="v3",370 ),371 ).output372 else:373 response = llm_with_tools.invoke(374 [375 input_message,376 tool_call_message,377 tool_message,378 ]379 )380 assert isinstance(response, AIMessage)381382383@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")384@pytest.mark.vcr385async def test_agent_loop_streaming_astream_events_v3_v1() -> None:386 """Async multi-turn through `astream_events(version="v3")`.387388 Mirrors `test_agent_loop_streaming` for `output_version="v1"` but389 exercises `AsyncChatModelStream` end-to-end: aggregation in the390 async state machine, async projections, and the background391 producer task. Cassette byte-matches guarantee the aggregated392 message serializes identically to the legacy path on the393 follow-up turn.394 """395396 @tool397 def get_weather(location: str) -> str:398 """Get the weather for a location."""399 return "It's sunny."400401 llm = ChatOpenAI(402 model="gpt-5.2",403 use_responses_api=True,404 reasoning={"effort": "medium", "summary": "auto"},405 streaming=True,406 output_version="v1",407 )408 llm_with_tools = llm.bind_tools([get_weather])409 input_message = HumanMessage("What is the weather in San Francisco, CA?")410 stream = await cast(411 "Awaitable[AsyncChatModelStream]",412 llm_with_tools.astream_events([input_message], version="v3"),413 )414 tool_call_message = await stream415 assert isinstance(tool_call_message, AIMessage)416 tool_calls = tool_call_message.tool_calls417 assert len(tool_calls) == 1418 tool_call = tool_calls[0]419 tool_message = get_weather.invoke(tool_call)420 assert isinstance(tool_message, ToolMessage)421 stream = await cast(422 "Awaitable[AsyncChatModelStream]",423 llm_with_tools.astream_events(424 [input_message, tool_call_message, tool_message],425 version="v3",426 ),427 )428 response = await stream429 assert isinstance(response, AIMessage)430431432class Foo(BaseModel):433 response: str434435436class FooDict(TypedDict):437 response: str438439440@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz")441@pytest.mark.vcr442@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])443def test_parsed_pydantic_schema(444 output_version: Literal["v0", "responses/v1", "v1"],445) -> None:446 llm = ChatOpenAI(447 model=MODEL_NAME, use_responses_api=True, output_version=output_version448 )449 response = llm.invoke("how are ya", response_format=Foo)450 parsed = Foo(**json.loads(response.text))451 assert parsed == response.additional_kwargs["parsed"]452 assert parsed.response453454 # Test stream455 full: BaseMessageChunk | None = None456 for chunk in llm.stream("how are ya", response_format=Foo):457 assert isinstance(chunk, AIMessageChunk)458 full = chunk if full is None else full + chunk459 assert isinstance(full, AIMessageChunk)460 parsed = Foo(**json.loads(full.text))461 assert parsed == full.additional_kwargs["parsed"]462 assert parsed.response463464465async def test_parsed_pydantic_schema_async() -> None:466 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)467 response = await llm.ainvoke("how are ya", response_format=Foo)468 parsed = Foo(**json.loads(response.text))469 assert parsed == response.additional_kwargs["parsed"]470 assert parsed.response471472 # Test stream473 full: BaseMessageChunk | None = None474 async for chunk in llm.astream("how are ya", response_format=Foo):475 assert isinstance(chunk, AIMessageChunk)476 full = chunk if full is None else full + chunk477 assert isinstance(full, AIMessageChunk)478 parsed = Foo(**json.loads(full.text))479 assert parsed == full.additional_kwargs["parsed"]480 assert parsed.response481482483@pytest.mark.flaky(retries=3, delay=1)484@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])485def test_parsed_dict_schema(schema: Any) -> None:486 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)487 response = llm.invoke("how are ya", response_format=schema)488 parsed = json.loads(response.text)489 assert parsed == response.additional_kwargs["parsed"]490 assert parsed["response"]491 assert isinstance(parsed["response"], str)492493 # Test stream494 full: BaseMessageChunk | None = None495 for chunk in llm.stream("how are ya", response_format=schema):496 assert isinstance(chunk, AIMessageChunk)497 full = chunk if full is None else full + chunk498 assert isinstance(full, AIMessageChunk)499 parsed = json.loads(full.text)500 assert parsed == full.additional_kwargs["parsed"]501 assert parsed["response"]502 assert isinstance(parsed["response"], str)503504505def test_parsed_strict() -> None:506 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)507508 class Joke(TypedDict):509 setup: Annotated[str, ..., "The setup of the joke"]510 punchline: Annotated[str, None, "The punchline of the joke"]511512 schema = _convert_to_openai_response_format(Joke)513 invalid_schema = cast(dict, _convert_to_openai_response_format(Joke, strict=True))514 invalid_schema["json_schema"]["schema"]["required"] = ["setup"] # make invalid515516 # Test not strict517 response = llm.invoke("Tell me a joke", response_format=schema)518 parsed = json.loads(response.text)519 assert parsed == response.additional_kwargs["parsed"]520521 # Test strict522 with pytest.raises(openai.BadRequestError):523 llm.invoke(524 "Tell me a joke about cats.", response_format=invalid_schema, strict=True525 )526 with pytest.raises(openai.BadRequestError):527 next(528 llm.stream(529 "Tell me a joke about cats.",530 response_format=invalid_schema,531 strict=True,532 )533 )534535536@pytest.mark.flaky(retries=3, delay=1)537@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])538async def test_parsed_dict_schema_async(schema: Any) -> None:539 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)540 response = await llm.ainvoke("how are ya", response_format=schema)541 parsed = json.loads(response.text)542 assert parsed == response.additional_kwargs["parsed"]543 assert parsed["response"]544 assert isinstance(parsed["response"], str)545546 # Test stream547 full: BaseMessageChunk | None = None548 async for chunk in llm.astream("how are ya", response_format=schema):549 assert isinstance(chunk, AIMessageChunk)550 full = chunk if full is None else full + chunk551 assert isinstance(full, AIMessageChunk)552 parsed = json.loads(full.text)553 assert parsed == full.additional_kwargs["parsed"]554 assert parsed["response"]555 assert isinstance(parsed["response"], str)556557558@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict])559def test_function_calling_and_structured_output(schema: Any) -> None:560 def multiply(x: int, y: int) -> int:561 """return x * y"""562 return x * y563564 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)565 bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True)566 # Test structured output567 response = llm.invoke("how are ya", response_format=schema)568 if schema == Foo:569 parsed = schema(**json.loads(response.text))570 assert parsed.response571 else:572 parsed = json.loads(response.text)573 assert parsed["response"]574 assert parsed == response.additional_kwargs["parsed"]575576 # Test function calling577 ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))578 assert len(ai_msg.tool_calls) == 1579 assert ai_msg.tool_calls[0]["name"] == "multiply"580 assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}581582583@pytest.mark.default_cassette("test_reasoning.yaml.gz")584@pytest.mark.vcr585@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"])586def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None:587 llm = ChatOpenAI(588 model="gpt-5-nano", use_responses_api=True, output_version=output_version589 )590 response = llm.invoke("Hello", reasoning={"effort": "low"})591 assert isinstance(response, AIMessage)592593 # Test init params + streaming594 llm = ChatOpenAI(595 model="gpt-5-nano", reasoning={"effort": "low"}, output_version=output_version596 )597 full: BaseMessageChunk | None = None598 for chunk in llm.stream("Hello"):599 assert isinstance(chunk, AIMessageChunk)600 full = chunk if full is None else full + chunk601 assert isinstance(full, AIMessage)602603 for msg in [response, full]:604 if output_version == "v0":605 assert msg.additional_kwargs["reasoning"]606 else:607 block_types = [block["type"] for block in msg.content]608 assert block_types == ["reasoning", "text"]609610611def test_stateful_api() -> None:612 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)613 response = llm.invoke("how are you, my name is Bobo")614 assert "id" in response.response_metadata615616 second_response = llm.invoke(617 "what's my name", previous_response_id=response.response_metadata["id"]618 )619 assert isinstance(second_response.content, list)620 assert "bobo" in second_response.content[0]["text"].lower() # type: ignore621622623def test_route_from_model_kwargs() -> None:624 llm = ChatOpenAI(625 model=MODEL_NAME, model_kwargs={"text": {"format": {"type": "text"}}}626 )627 _ = next(llm.stream("Hello"))628629630@pytest.mark.flaky(retries=3, delay=1)631def test_computer_calls() -> None:632 llm = ChatOpenAI(model="gpt-5.4")633 tool = {"type": "computer"}634 llm_with_tools = llm.bind_tools([tool], tool_choice="any")635 response = llm_with_tools.invoke("Please open the browser.")636 assert any(block["type"] == "computer_call" for block in response.content) # type: ignore[index]637638639@pytest.mark.default_cassette("test_file_search.yaml.gz")640@pytest.mark.vcr641@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])642def test_file_search(643 output_version: Literal["responses/v1", "v1"],644) -> None:645 vector_store_id = os.getenv("OPENAI_VECTOR_STORE_ID")646 if not vector_store_id:647 pytest.skip()648649 llm = ChatOpenAI(650 model=MODEL_NAME,651 use_responses_api=True,652 output_version=output_version,653 )654 tool = {655 "type": "file_search",656 "vector_store_ids": [vector_store_id],657 }658659 input_message = {"role": "user", "content": "What is deep research by OpenAI?"}660 response = llm.invoke([input_message], tools=[tool])661 _check_response(response)662663 if output_version == "v1":664 assert [block["type"] for block in response.content] == [ # type: ignore[index]665 "server_tool_call",666 "server_tool_result",667 "text",668 ]669 else:670 assert [block["type"] for block in response.content] == [ # type: ignore[index]671 "file_search_call",672 "text",673 ]674675 full: AIMessageChunk | None = None676 for chunk in llm.stream([input_message], tools=[tool]):677 assert isinstance(chunk, AIMessageChunk)678 full = chunk if full is None else full + chunk679 assert isinstance(full, AIMessageChunk)680 _check_response(full)681682 if output_version == "v1":683 assert [block["type"] for block in full.content] == [ # type: ignore[index]684 "server_tool_call",685 "server_tool_result",686 "text",687 ]688 else:689 assert [block["type"] for block in full.content] == ["file_search_call", "text"] # type: ignore[index]690691 next_message = {"role": "user", "content": "Thank you."}692 _ = llm.invoke([input_message, full, next_message])693694 for message in [response, full]:695 assert [block["type"] for block in message.content_blocks] == [696 "server_tool_call",697 "server_tool_result",698 "text",699 ]700701702@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz")703@pytest.mark.vcr704@pytest.mark.parametrize(705 ("output_version", "use_v2_stream"),706 [707 ("v0", False),708 ("responses/v1", False),709 ("v1", False),710 ("v1", True),711 ],712)713def test_stream_reasoning_summary(714 output_version: Literal["v0", "responses/v1", "v1"],715 use_v2_stream: bool,716) -> None:717 llm = ChatOpenAI(718 model="gpt-5-nano",719 # Routes to Responses API if `reasoning` is set.720 reasoning={"effort": "medium", "summary": "auto"},721 output_version=output_version,722 )723 message_1 = {724 "role": "user",725 "content": "What was the third tallest buliding in the year 2000?",726 }727 response_1: BaseMessage728 if use_v2_stream:729 response_1 = llm.stream_events([message_1], version="v3").output730 else:731 aggregated: BaseMessageChunk | None = None732 for chunk in llm.stream([message_1]):733 assert isinstance(chunk, AIMessageChunk)734 aggregated = chunk if aggregated is None else aggregated + chunk735 assert isinstance(aggregated, AIMessageChunk)736 response_1 = aggregated737 if output_version == "v0":738 reasoning = response_1.additional_kwargs["reasoning"]739 assert set(reasoning.keys()) == {"id", "type", "summary"}740 summary = reasoning["summary"]741 assert isinstance(summary, list)742 for block in summary:743 assert isinstance(block, dict)744 assert isinstance(block["type"], str)745 assert isinstance(block["text"], str)746 assert block["text"]747 elif output_version == "responses/v1":748 reasoning = next(749 block750 for block in response_1.content751 if block["type"] == "reasoning" # type: ignore[index]752 )753 if isinstance(reasoning, str):754 reasoning = json.loads(reasoning)755 assert set(reasoning.keys()) == {"id", "type", "summary", "index"}756 summary = reasoning["summary"]757 assert isinstance(summary, list)758 for block in summary:759 assert isinstance(block, dict)760 assert isinstance(block["type"], str)761 assert isinstance(block["text"], str)762 assert block["text"]763 else:764 # v1765 total_reasoning_blocks = 0766 for block in response_1.content_blocks:767 if block["type"] == "reasoning":768 total_reasoning_blocks += 1769 assert isinstance(block.get("id"), str)770 assert block.get("id", "").startswith("rs_")771 assert isinstance(block.get("reasoning"), str)772 assert isinstance(block.get("index"), str)773 assert (774 total_reasoning_blocks > 1775 ) # This query typically generates multiple reasoning blocks776777 # Check we can pass back summaries778 message_2 = {"role": "user", "content": "Thank you."}779 response_2 = llm.invoke([message_1, response_1, message_2])780 assert isinstance(response_2, AIMessage)781782783@pytest.mark.default_cassette("test_code_interpreter.yaml.gz")784@pytest.mark.vcr785@pytest.mark.parametrize(786 ("output_version", "use_v2_stream"),787 [788 ("v0", False),789 ("responses/v1", False),790 ("v1", False),791 ("v1", True),792 ],793)794def test_code_interpreter(795 output_version: Literal["v0", "responses/v1", "v1"], use_v2_stream: bool796) -> None:797 llm = ChatOpenAI(798 model="gpt-5-nano", use_responses_api=True, output_version=output_version799 )800 llm_with_tools = llm.bind_tools(801 [{"type": "code_interpreter", "container": {"type": "auto"}}]802 )803 input_message = {804 "role": "user",805 "content": "Write and run code to answer the question: what is 3^3?",806 }807 response = llm_with_tools.invoke([input_message])808 assert isinstance(response, AIMessage)809 _check_response(response)810 if output_version == "v0":811 tool_outputs = [812 item813 for item in response.additional_kwargs["tool_outputs"]814 if item["type"] == "code_interpreter_call"815 ]816 assert len(tool_outputs) == 1817 elif output_version == "responses/v1":818 tool_outputs = [819 item820 for item in response.content821 if isinstance(item, dict) and item["type"] == "code_interpreter_call"822 ]823 assert len(tool_outputs) == 1824 else:825 # v1826 tool_outputs = [827 item828 for item in response.content_blocks829 if item["type"] == "server_tool_call" and item["name"] == "code_interpreter"830 ]831 code_interpreter_result = next(832 item833 for item in response.content_blocks834 if item["type"] == "server_tool_result"835 )836 assert tool_outputs837 assert code_interpreter_result838 assert len(tool_outputs) == 1839840 # Test streaming841 # Use same container842 container_id = tool_outputs[0].get("container_id") or tool_outputs[0].get(843 "extras", {}844 ).get("container_id")845 llm_with_tools = llm.bind_tools(846 [{"type": "code_interpreter", "container": container_id}]847 )848849 full: BaseMessage850 if use_v2_stream:851 full = cast(852 "ChatModelStream",853 llm_with_tools.stream_events([input_message], version="v3"),854 ).output855 else:856 aggregated: BaseMessageChunk | None = None857 for chunk in llm_with_tools.stream([input_message]):858 assert isinstance(chunk, AIMessageChunk)859 aggregated = chunk if aggregated is None else aggregated + chunk860 assert isinstance(aggregated, AIMessageChunk)861 full = aggregated862 if output_version == "v0":863 tool_outputs = [864 item865 for item in response.additional_kwargs["tool_outputs"]866 if item["type"] == "code_interpreter_call"867 ]868 assert tool_outputs869 elif output_version == "responses/v1":870 tool_outputs = [871 item872 for item in response.content873 if isinstance(item, dict) and item["type"] == "code_interpreter_call"874 ]875 assert tool_outputs876 else:877 # v1878 code_interpreter_call = next(879 item880 for item in full.content_blocks881 if item["type"] == "server_tool_call" and item["name"] == "code_interpreter"882 )883 code_interpreter_result = next(884 item for item in full.content_blocks if item["type"] == "server_tool_result"885 )886 assert code_interpreter_call887 assert code_interpreter_result888889 # Test we can pass back in890 next_message = {"role": "user", "content": "Please add more comments to the code."}891 _ = llm_with_tools.invoke([input_message, full, next_message])892893894@pytest.mark.vcr895def test_mcp_builtin() -> None:896 llm = ChatOpenAI(model="gpt-5-nano", use_responses_api=True, output_version="v0")897898 llm_with_tools = llm.bind_tools(899 [900 {901 "type": "mcp",902 "server_label": "deepwiki",903 "server_url": "https://mcp.deepwiki.com/mcp",904 "require_approval": {"always": {"tool_names": ["read_wiki_structure"]}},905 }906 ]907 )908 input_message = {909 "role": "user",910 "content": (911 "What transport protocols does the 2025-03-26 version of the MCP spec "912 "support?"913 ),914 }915 response = llm_with_tools.invoke([input_message])916 assert all(isinstance(block, dict) for block in response.content)917918 approval_message = HumanMessage(919 [920 {921 "type": "mcp_approval_response",922 "approve": True,923 "approval_request_id": output["id"],924 }925 for output in response.additional_kwargs["tool_outputs"]926 if output["type"] == "mcp_approval_request"927 ]928 )929 _ = llm_with_tools.invoke(930 [approval_message], previous_response_id=response.response_metadata["id"]931 )932933934@pytest.mark.vcr935def test_mcp_builtin_zdr() -> None:936 llm = ChatOpenAI(937 model="gpt-5-nano",938 use_responses_api=True,939 store=False,940 include=["reasoning.encrypted_content"],941 )942943 llm_with_tools = llm.bind_tools(944 [945 {946 "type": "mcp",947 "server_label": "deepwiki",948 "server_url": "https://mcp.deepwiki.com/mcp",949 "allowed_tools": ["ask_question"],950 "require_approval": "always",951 }952 ]953 )954 input_message = {955 "role": "user",956 "content": (957 "What transport protocols does the 2025-03-26 version of the MCP "958 "spec (modelcontextprotocol/modelcontextprotocol) support?"959 ),960 }961 full: BaseMessageChunk | None = None962 for chunk in llm_with_tools.stream([input_message]):963 assert isinstance(chunk, AIMessageChunk)964 full = chunk if full is None else full + chunk965966 assert isinstance(full, AIMessageChunk)967 assert all(isinstance(block, dict) for block in full.content)968969 approval_message = HumanMessage(970 [971 {972 "type": "mcp_approval_response",973 "approve": True,974 "approval_request_id": block["id"], # type: ignore[index]975 }976 for block in full.content977 if block["type"] == "mcp_approval_request" # type: ignore[index]978 ]979 )980 result = llm_with_tools.invoke([input_message, full, approval_message])981 next_message = {"role": "user", "content": "Thanks!"}982 _ = llm_with_tools.invoke(983 [input_message, full, approval_message, result, next_message]984 )985986987@pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz")988@pytest.mark.vcr989@pytest.mark.parametrize("use_v2_stream", [False, True])990def test_mcp_builtin_zdr_v1(use_v2_stream: bool) -> None:991 llm = ChatOpenAI(992 model="gpt-5-nano",993 output_version="v1",994 store=False,995 include=["reasoning.encrypted_content"],996 )997998 llm_with_tools = llm.bind_tools(999 [1000 {1001 "type": "mcp",1002 "server_label": "deepwiki",1003 "server_url": "https://mcp.deepwiki.com/mcp",1004 "allowed_tools": ["ask_question"],1005 "require_approval": "always",1006 }1007 ]1008 )1009 input_message = {1010 "role": "user",1011 "content": (1012 "What transport protocols does the 2025-03-26 version of the MCP "1013 "spec (modelcontextprotocol/modelcontextprotocol) support?"1014 ),1015 }1016 full: BaseMessage1017 if use_v2_stream:1018 full = cast(1019 "ChatModelStream",1020 llm_with_tools.stream_events([input_message], version="v3"),1021 ).output1022 else:1023 aggregated: BaseMessageChunk | None = None1024 for chunk in llm_with_tools.stream([input_message]):1025 assert isinstance(chunk, AIMessageChunk)1026 aggregated = chunk if aggregated is None else aggregated + chunk1027 assert isinstance(aggregated, AIMessageChunk)1028 full = aggregated10291030 assert isinstance(full, AIMessage)1031 assert all(isinstance(block, dict) for block in full.content)10321033 approval_message = HumanMessage(1034 [1035 {1036 "type": "non_standard",1037 "value": {1038 "type": "mcp_approval_response",1039 "approve": True,1040 "approval_request_id": block["value"]["id"], # type: ignore[index]1041 },1042 }1043 for block in full.content_blocks1044 if block["type"] == "non_standard"1045 and block["value"]["type"] == "mcp_approval_request" # type: ignore[index]1046 ]1047 )1048 result = llm_with_tools.invoke([input_message, full, approval_message])1049 next_message = {"role": "user", "content": "Thanks!"}1050 _ = llm_with_tools.invoke(1051 [input_message, full, approval_message, result, next_message]1052 )105310541055@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz")1056@pytest.mark.vcr1057@pytest.mark.parametrize("output_version", ["v0", "responses/v1"])1058def test_image_generation_streaming(1059 output_version: Literal["v0", "responses/v1"],1060) -> None:1061 """Test image generation streaming."""1062 llm = ChatOpenAI(1063 model="gpt-4.1", use_responses_api=True, output_version=output_version1064 )1065 tool = {1066 "type": "image_generation",1067 # For testing purposes let's keep the quality low, so the test runs faster.1068 "quality": "low",1069 "output_format": "jpeg",1070 "output_compression": 100,1071 "size": "1024x1024",1072 }10731074 # Example tool output for an image1075 # {1076 # "background": "opaque",1077 # "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8",1078 # "output_format": "png",1079 # "quality": "high",1080 # "revised_prompt": "A fluffy, fuzzy cat sitting calmly, with soft fur, bright "1081 # "eyes, and a cute, friendly expression. The background is "1082 # "simple and light to emphasize the cat's texture and "1083 # "fluffiness.",1084 # "size": "1024x1024",1085 # "status": "completed",1086 # "type": "image_generation_call",1087 # "result": # base64 encode image data1088 # }10891090 expected_keys = {1091 "id",1092 "index",1093 "background",1094 "output_format",1095 "quality",1096 "result",1097 "revised_prompt",1098 "size",1099 "status",1100 "type",1101 }11021103 full: BaseMessageChunk | None = None1104 for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]):1105 assert isinstance(chunk, AIMessageChunk)1106 full = chunk if full is None else full + chunk1107 complete_ai_message = cast(AIMessageChunk, full)1108 # At the moment, the streaming API does not pick up annotations fully.1109 # So the following check is commented out.1110 # _check_response(complete_ai_message)1111 if output_version == "v0":1112 assert complete_ai_message.additional_kwargs["tool_outputs"]1113 tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]1114 assert set(tool_output.keys()).issubset(expected_keys)1115 else:1116 # "responses/v1"1117 tool_output = next(1118 block1119 for block in complete_ai_message.content1120 if isinstance(block, dict) and block["type"] == "image_generation_call"1121 )1122 assert set(tool_output.keys()).issubset(expected_keys)112311241125@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz")1126@pytest.mark.vcr1127def test_image_generation_streaming_v1() -> None:1128 """Test image generation streaming."""1129 llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1")1130 tool = {1131 "type": "image_generation",1132 "quality": "low",1133 "output_format": "jpeg",1134 "output_compression": 100,1135 "size": "1024x1024",1136 }11371138 standard_keys = {"type", "base64", "mime_type", "id", "index"}1139 extra_keys = {1140 "background",1141 "output_format",1142 "quality",1143 "revised_prompt",1144 "size",1145 "status",1146 }11471148 full: BaseMessageChunk | None = None1149 for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]):1150 assert isinstance(chunk, AIMessageChunk)1151 full = chunk if full is None else full + chunk1152 complete_ai_message = cast(AIMessageChunk, full)11531154 tool_output = next(1155 block1156 for block in complete_ai_message.content1157 if isinstance(block, dict) and block["type"] == "image"1158 )1159 assert set(standard_keys).issubset(tool_output.keys())1160 assert set(extra_keys).issubset(tool_output["extras"].keys())116111621163@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz")1164@pytest.mark.vcr1165@pytest.mark.parametrize("output_version", ["v0", "responses/v1"])1166def test_image_generation_multi_turn(1167 output_version: Literal["v0", "responses/v1"],1168) -> None:1169 """Test multi-turn editing of image generation by passing in history."""1170 # Test multi-turn1171 llm = ChatOpenAI(1172 model="gpt-4.1", use_responses_api=True, output_version=output_version1173 )1174 # Test invocation1175 tool = {1176 "type": "image_generation",1177 # For testing purposes let's keep the quality low, so the test runs faster.1178 "quality": "low",1179 "output_format": "jpeg",1180 "output_compression": 100,1181 "size": "1024x1024",1182 }1183 llm_with_tools = llm.bind_tools([tool])11841185 chat_history: list[MessageLikeRepresentation] = [1186 {"role": "user", "content": "Draw a random short word in green font."}1187 ]1188 ai_message = llm_with_tools.invoke(chat_history)1189 assert isinstance(ai_message, AIMessage)1190 _check_response(ai_message)11911192 expected_keys = {1193 "id",1194 "background",1195 "output_format",1196 "quality",1197 "result",1198 "revised_prompt",1199 "size",1200 "status",1201 "type",1202 }12031204 if output_version == "v0":1205 tool_output = ai_message.additional_kwargs["tool_outputs"][0]1206 assert set(tool_output.keys()).issubset(expected_keys)1207 elif output_version == "responses/v1":1208 tool_output = next(1209 block1210 for block in ai_message.content1211 if isinstance(block, dict) and block["type"] == "image_generation_call"1212 )1213 assert set(tool_output.keys()).issubset(expected_keys)1214 else:1215 standard_keys = {"type", "base64", "id", "status"}1216 tool_output = next(1217 block1218 for block in ai_message.content1219 if isinstance(block, dict) and block["type"] == "image"1220 )1221 assert set(standard_keys).issubset(tool_output.keys())12221223 # Example tool output for an image (v0)1224 # {1225 # "background": "opaque",1226 # "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8",1227 # "output_format": "png",1228 # "quality": "high",1229 # "revised_prompt": "A fluffy, fuzzy cat sitting calmly, with soft fur, bright "1230 # "eyes, and a cute, friendly expression. The background is "1231 # "simple and light to emphasize the cat's texture and "1232 # "fluffiness.",1233 # "size": "1024x1024",1234 # "status": "completed",1235 # "type": "image_generation_call",1236 # "result": # base64 encode image data1237 # }12381239 chat_history.extend(1240 [1241 # AI message with tool output1242 ai_message,1243 # New request1244 {1245 "role": "user",1246 "content": (1247 "Now, change the font to blue. Keep the word and everything else "1248 "the same."1249 ),1250 },1251 ]1252 )12531254 ai_message2 = llm_with_tools.invoke(chat_history)1255 assert isinstance(ai_message2, AIMessage)1256 _check_response(ai_message2)12571258 if output_version == "v0":1259 tool_output = ai_message2.additional_kwargs["tool_outputs"][0]1260 assert set(tool_output.keys()).issubset(expected_keys)1261 else:1262 # "responses/v1"1263 tool_output = next(1264 block1265 for block in ai_message2.content1266 if isinstance(block, dict) and block["type"] == "image_generation_call"1267 )1268 assert set(tool_output.keys()).issubset(expected_keys)126912701271@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz")1272@pytest.mark.vcr1273def test_image_generation_multi_turn_v1() -> None:1274 """Test multi-turn editing of image generation by passing in history."""1275 # Test multi-turn1276 llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1")1277 # Test invocation1278 tool = {1279 "type": "image_generation",1280 "quality": "low",1281 "output_format": "jpeg",1282 "output_compression": 100,1283 "size": "1024x1024",1284 }1285 llm_with_tools = llm.bind_tools([tool])12861287 chat_history: list[MessageLikeRepresentation] = [1288 {"role": "user", "content": "Draw a random short word in green font."}1289 ]1290 ai_message = llm_with_tools.invoke(chat_history)1291 assert isinstance(ai_message, AIMessage)1292 _check_response(ai_message)12931294 standard_keys = {"type", "base64", "mime_type", "id"}1295 extra_keys = {1296 "background",1297 "output_format",1298 "quality",1299 "revised_prompt",1300 "size",1301 "status",1302 }13031304 tool_output = next(1305 block1306 for block in ai_message.content1307 if isinstance(block, dict) and block["type"] == "image"1308 )1309 assert set(standard_keys).issubset(tool_output.keys())1310 assert set(extra_keys).issubset(tool_output["extras"].keys())13111312 chat_history.extend(1313 [1314 # AI message with tool output1315 ai_message,1316 # New request1317 {1318 "role": "user",1319 "content": (1320 "Now, change the font to blue. Keep the word and everything else "1321 "the same."1322 ),1323 },1324 ]1325 )13261327 ai_message2 = llm_with_tools.invoke(chat_history)1328 assert isinstance(ai_message2, AIMessage)1329 _check_response(ai_message2)13301331 tool_output = next(1332 block1333 for block in ai_message2.content1334 if isinstance(block, dict) and block["type"] == "image"1335 )1336 assert set(standard_keys).issubset(tool_output.keys())1337 assert set(extra_keys).issubset(tool_output["extras"].keys())133813391340def test_verbosity_parameter() -> None:1341 """Test verbosity parameter with Responses API.13421343 Tests that the verbosity parameter works correctly with the OpenAI Responses API.13441345 """1346 llm = ChatOpenAI(model=MODEL_NAME, verbosity="medium", use_responses_api=True)1347 response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")])13481349 assert isinstance(response, AIMessage)1350 assert response.content135113521353@pytest.mark.default_cassette("test_custom_tool.yaml.gz")1354@pytest.mark.vcr1355@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1356def test_custom_tool(output_version: Literal["responses/v1", "v1"]) -> None:1357 @custom_tool1358 def execute_code(code: str) -> str:1359 """Execute python code."""1360 return "27"13611362 llm = ChatOpenAI(model="gpt-5", output_version=output_version).bind_tools(1363 [execute_code]1364 )13651366 input_message = {"role": "user", "content": "Use the tool to evaluate 3^3."}1367 tool_call_message = llm.invoke([input_message])1368 assert isinstance(tool_call_message, AIMessage)1369 assert len(tool_call_message.tool_calls) == 11370 tool_call = tool_call_message.tool_calls[0]1371 tool_message = execute_code.invoke(tool_call)1372 response = llm.invoke([input_message, tool_call_message, tool_message])1373 assert isinstance(response, AIMessage)13741375 # Test streaming1376 full: BaseMessageChunk | None = None1377 for chunk in llm.stream([input_message]):1378 assert isinstance(chunk, AIMessageChunk)1379 full = chunk if full is None else full + chunk1380 assert isinstance(full, AIMessageChunk)1381 assert len(full.tool_calls) == 1138213831384@pytest.mark.default_cassette("test_compaction.yaml.gz")1385@pytest.mark.vcr1386@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1387def test_compaction(output_version: Literal["responses/v1", "v1"]) -> None:1388 """Test the compaction beta feature."""1389 llm = ChatOpenAI(1390 model="gpt-5.2",1391 context_management=[{"type": "compaction", "compact_threshold": 10_000}],1392 output_version=output_version,1393 )13941395 input_message = {1396 "role": "user",1397 "content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",1398 }1399 messages: list = [input_message]14001401 first_response = llm.invoke(messages)1402 messages.append(first_response)14031404 second_message = {1405 "role": "user",1406 "content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",1407 }1408 messages.append(second_message)14091410 second_response = llm.invoke(messages)1411 messages.append(second_response)14121413 content_blocks = second_response.content_blocks1414 compaction_block = next(1415 (block for block in content_blocks if block["type"] == "non_standard"),1416 None,1417 )1418 assert compaction_block1419 assert compaction_block["value"].get("type") == "compaction"14201421 third_message = {1422 "role": "user",1423 "content": "What are we talking about?",1424 }1425 messages.append(third_message)1426 third_response = llm.invoke(messages)1427 assert third_response.text142814291430@pytest.mark.default_cassette("test_compaction_streaming.yaml.gz")1431@pytest.mark.vcr1432@pytest.mark.parametrize(1433 ("output_version", "use_v2_stream"),1434 [1435 ("responses/v1", False),1436 ("v1", False),1437 ("v1", True),1438 ],1439)1440def test_compaction_streaming(1441 output_version: Literal["responses/v1", "v1"], use_v2_stream: bool1442) -> None:1443 """Test the compaction beta feature."""1444 llm = ChatOpenAI(1445 model="gpt-5.2",1446 context_management=[{"type": "compaction", "compact_threshold": 10_000}],1447 output_version=output_version,1448 streaming=True,1449 )14501451 def _run(messages: list) -> AIMessage:1452 if use_v2_stream:1453 return llm.stream_events(messages, version="v3").output1454 result = llm.invoke(messages)1455 assert isinstance(result, AIMessage)1456 return result14571458 input_message = {1459 "role": "user",1460 "content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",1461 }1462 messages: list = [input_message]14631464 first_response = _run(messages)1465 messages.append(first_response)14661467 second_message = {1468 "role": "user",1469 "content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",1470 }1471 messages.append(second_message)14721473 second_response = _run(messages)1474 messages.append(second_response)14751476 content_blocks = second_response.content_blocks1477 compaction_block = next(1478 (block for block in content_blocks if block["type"] == "non_standard"),1479 None,1480 )1481 assert compaction_block1482 assert compaction_block["value"].get("type") == "compaction"14831484 third_message = {1485 "role": "user",1486 "content": "What are we talking about?",1487 }1488 messages.append(third_message)1489 third_response = _run(messages)1490 assert third_response.text149114921493def test_csv_input() -> None:1494 """Test CSV file input with both LangChain standard and OpenAI native formats."""1495 # Create sample CSV content1496 csv_content = (1497 "name,age,city\nAlice,30,New York\nBob,25,Los Angeles\nCarol,35,Chicago"1498 )1499 csv_bytes = csv_content.encode("utf-8")1500 base64_string = base64.b64encode(csv_bytes).decode("utf-8")15011502 llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)15031504 # Test LangChain standard format1505 langchain_message = {1506 "role": "user",1507 "content": [1508 {1509 "type": "text",1510 "text": "How many people are in this CSV file?",1511 },1512 {1513 "type": "file",1514 "base64": base64_string,1515 "mime_type": "text/csv",1516 "filename": "people.csv",1517 },1518 ],1519 }1520 payload = llm._get_request_payload([langchain_message])1521 block = payload["input"][0]["content"][1]1522 assert block["type"] == "input_file"15231524 response = llm.invoke([langchain_message])1525 assert isinstance(response, AIMessage)1526 assert response.content1527 assert (1528 "3" in str(response.content).lower() or "three" in str(response.content).lower()1529 )15301531 # Test OpenAI native format1532 openai_message = {1533 "role": "user",1534 "content": [1535 {1536 "type": "text",1537 "text": "How many people are in this CSV file?",1538 },1539 {1540 "type": "input_file",1541 "filename": "people.csv",1542 "file_data": f"data:text/csv;base64,{base64_string}",1543 },1544 ],1545 }1546 payload2 = llm._get_request_payload([openai_message])1547 block2 = payload2["input"][0]["content"][1]1548 assert block2["type"] == "input_file"15491550 response2 = llm.invoke([openai_message])1551 assert isinstance(response2, AIMessage)1552 assert response2.content1553 assert (1554 "3" in str(response2.content).lower()1555 or "three" in str(response2.content).lower()1556 )155715581559@pytest.mark.default_cassette("test_phase.yaml.gz")1560@pytest.mark.vcr1561@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1562def test_phase(output_version: str) -> None:1563 def get_weather(location: str) -> str:1564 """Get the weather at a location."""1565 return "It's sunny."15661567 model = ChatOpenAI(1568 model="gpt-5.4",1569 use_responses_api=True,1570 verbosity="high",1571 reasoning={"effort": "medium", "summary": "auto"},1572 output_version=output_version,1573 )15741575 agent = create_agent(model, tools=[get_weather])15761577 input_message = {1578 "role": "user",1579 "content": (1580 "What's the weather in the oldest major city in the US? State your answer "1581 "and then generate a tool call this turn."1582 ),1583 }1584 result = agent.invoke({"messages": [input_message]})1585 first_response = result["messages"][1]1586 text_block = next(1587 block for block in first_response.content if block["type"] == "text"1588 )1589 assert text_block["phase"] == "commentary"15901591 final_response = result["messages"][-1]1592 text_block = next(1593 block for block in final_response.content if block["type"] == "text"1594 )1595 assert text_block["phase"] == "final_answer"159615971598@pytest.mark.default_cassette("test_phase_streaming.yaml.gz")1599@pytest.mark.vcr1600@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1601def test_phase_streaming(output_version: str) -> None:1602 def get_weather(location: str) -> str:1603 """Get the weather at a location."""1604 return "It's sunny."16051606 model = ChatOpenAI(1607 model="gpt-5.4",1608 use_responses_api=True,1609 verbosity="high",1610 reasoning={"effort": "medium", "summary": "auto"},1611 streaming=True,1612 output_version=output_version,1613 )16141615 agent = create_agent(model, tools=[get_weather])16161617 input_message = {1618 "role": "user",1619 "content": (1620 "What's the weather in the oldest major city in the US? State your answer "1621 "and then generate a tool call this turn."1622 ),1623 }1624 result = agent.invoke({"messages": [input_message]})1625 first_response = result["messages"][1]1626 if output_version == "responses/v1":1627 assert [block["type"] for block in first_response.content] == [1628 "reasoning",1629 "text",1630 "function_call",1631 ]1632 else:1633 assert [block["type"] for block in first_response.content] == [1634 "reasoning",1635 "text",1636 "tool_call",1637 ]1638 text_block = next(1639 block for block in first_response.content if block["type"] == "text"1640 )1641 assert text_block["phase"] == "commentary"16421643 final_response = result["messages"][-1]1644 assert [block["type"] for block in final_response.content] == ["text"]1645 text_block = next(1646 block for block in final_response.content if block["type"] == "text"1647 )1648 assert text_block["phase"] == "final_answer"164916501651@pytest.mark.default_cassette("test_tool_search.yaml.gz")1652@pytest.mark.vcr1653@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1654def test_tool_search(output_version: str) -> None:1655 @tool(extras={"defer_loading": True})1656 def get_weather(location: str) -> str:1657 """Get the current weather for a location."""1658 return f"The weather in {location} is sunny and 72°F"16591660 @tool(extras={"defer_loading": True})1661 def get_recipe(query: str) -> None:1662 """Get a recipe for chicken soup."""16631664 model = ChatOpenAI(1665 model="gpt-5.4",1666 use_responses_api=True,1667 output_version=output_version,1668 )16691670 agent = create_agent(1671 model=model,1672 tools=[get_weather, get_recipe, {"type": "tool_search"}],1673 )1674 input_message = {"role": "user", "content": "What's the weather in San Francisco?"}1675 result = agent.invoke({"messages": [input_message]})1676 assert len(result["messages"]) == 41677 tool_call_message = result["messages"][1]1678 assert isinstance(tool_call_message, AIMessage)1679 assert tool_call_message.tool_calls1680 if output_version == "v1":1681 assert [block["type"] for block in tool_call_message.content] == [ # type: ignore[index]1682 "server_tool_call",1683 "server_tool_result",1684 "tool_call",1685 ]1686 else:1687 assert [block["type"] for block in tool_call_message.content] == [ # type: ignore[index]1688 "tool_search_call",1689 "tool_search_output",1690 "function_call",1691 ]16921693 assert isinstance(result["messages"][2], ToolMessage)16941695 assert result["messages"][3].text169616971698@pytest.mark.default_cassette("test_tool_search_streaming.yaml.gz")1699@pytest.mark.vcr1700@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])1701def test_tool_search_streaming(output_version: str) -> None:1702 @tool(extras={"defer_loading": True})1703 def get_weather(location: str) -> str:1704 """Get the current weather for a location."""1705 return f"The weather in {location} is sunny and 72°F"17061707 @tool(extras={"defer_loading": True})1708 def get_recipe(query: str) -> None:1709 """Get a recipe for chicken soup."""17101711 model = ChatOpenAI(1712 model="gpt-5.4",1713 use_responses_api=True,1714 streaming=True,1715 output_version=output_version,1716 )17171718 agent = create_agent(1719 model=model,1720 tools=[get_weather, get_recipe, {"type": "tool_search"}],1721 )1722 input_message = {"role": "user", "content": "What's the weather in San Francisco?"}1723 result = agent.invoke({"messages": [input_message]})1724 assert len(result["messages"]) == 41725 tool_call_message = result["messages"][1]1726 assert isinstance(tool_call_message, AIMessage)1727 assert tool_call_message.tool_calls1728 if output_version == "v1":1729 assert [block["type"] for block in tool_call_message.content] == [ # type: ignore[index]1730 "server_tool_call",1731 "server_tool_result",1732 "tool_call",1733 ]1734 else:1735 assert [block["type"] for block in tool_call_message.content] == [ # type: ignore[index]1736 "tool_search_call",1737 "tool_search_output",1738 "function_call",1739 ]17401741 assert isinstance(result["messages"][2], ToolMessage)17421743 assert result["messages"][3].text174417451746@pytest.mark.vcr1747def test_client_executed_tool_search() -> None:1748 @tool1749 def get_weather(location: str) -> str:1750 """Get the current weather for a location."""1751 return f"The weather in {location} is sunny and 72°F"17521753 def search_tools(goal: str) -> list[dict]:1754 """Search for available tools to help answer the question."""1755 return [1756 {1757 "type": "function",1758 "defer_loading": True,1759 **convert_to_openai_tool(get_weather)["function"],1760 }1761 ]17621763 tool_search_schema = convert_to_openai_tool(search_tools, strict=True)1764 tool_search_config: dict = {1765 "type": "tool_search",1766 "execution": "client",1767 "description": tool_search_schema["function"]["description"],1768 "parameters": tool_search_schema["function"]["parameters"],1769 }17701771 class ClientToolSearchMiddleware(AgentMiddleware):1772 @hook_config(can_jump_to=["model"])1773 def after_model(self, state: AgentState, runtime: Any) -> dict[str, Any] | None:1774 last_message = state["messages"][-1]1775 if not isinstance(last_message, AIMessage):1776 return None1777 for block in last_message.content:1778 if isinstance(block, dict) and block.get("type") == "tool_search_call":1779 call_id = block.get("call_id")1780 args = block.get("arguments", {})1781 goal = args.get("goal", "") if isinstance(args, dict) else ""1782 loaded_tools = search_tools(goal)1783 tool_search_output = {1784 "type": "tool_search_output",1785 "execution": "client",1786 "call_id": call_id,1787 "status": "completed",1788 "tools": loaded_tools,1789 }1790 return {1791 "messages": [HumanMessage(content=[tool_search_output])],1792 "jump_to": "model",1793 }1794 return None17951796 def wrap_tool_call(1797 self,1798 request: ToolCallRequest,1799 handler: Any,1800 ) -> Any:1801 if request.tool_call["name"] == "get_weather":1802 return handler(request.override(tool=get_weather))1803 return handler(request)18041805 llm = ChatOpenAI(model="gpt-5.4", use_responses_api=True)18061807 agent = create_agent(1808 model=llm,1809 tools=[tool_search_config],1810 middleware=[ClientToolSearchMiddleware()],1811 )18121813 result = agent.invoke(1814 {"messages": [HumanMessage("What's the weather in San Francisco?")]}1815 )1816 messages = result["messages"]1817 search_tool_call = messages[1]1818 assert search_tool_call.content[0]["type"] == "tool_search_call"18191820 search_tool_output = messages[2]1821 assert search_tool_output.content[0]["type"] == "tool_search_output"18221823 tool_call = messages[3]1824 assert tool_call.tool_calls18251826 assert isinstance(messages[4], ToolMessage)18271828 assert messages[5].text182918301831@pytest.mark.default_cassette("test_reasoning_text_v1_v2_parity.yaml.gz")1832@pytest.mark.vcr1833def test_reasoning_text_v1_v2_parity() -> None:1834 """`stream()` and `stream_events(version="v3")` agree on reasoning + text.18351836 Exercises the non-tool-call branch of the parity claim: a reasoning1837 model (`gpt-5-nano` via the Responses API) produces one or more1838 `reasoning` blocks followed by a `text` block. Both paths replay the1839 same recorded HTTP response (cassette with `allow_playback_repeats`),1840 so any remaining divergence is a library issue.1841 """1842 llm = ChatOpenAI(1843 model="gpt-5-nano",1844 reasoning={"effort": "low", "summary": "auto"},1845 output_version="v1",1846 )1847 prompt = {"role": "user", "content": "What is the capital of France?"}18481849 v1: AIMessageChunk | None = None1850 for chunk in llm.stream([prompt]):1851 assert isinstance(chunk, AIMessageChunk)1852 v1 = chunk if v1 is None else v1 + chunk1853 assert isinstance(v1, AIMessageChunk)18541855 stream = llm.stream_events([prompt], version="v3")1856 events = list(stream)1857 assert_valid_event_stream(events)1858 v2 = stream.output1859 assert isinstance(v2, AIMessage)18601861 # No tool calls on either path.1862 assert v1.tool_calls == v2.tool_calls == []1863 assert v1.invalid_tool_calls == v2.invalid_tool_calls == []1864 assert v1.additional_kwargs == v2.additional_kwargs18651866 # Content structure must match: same block sequence, same accumulated1867 # text and reasoning payloads, same block identifiers. `content_blocks`1868 # is the v1-shaped projection and is canonical for both paths.1869 assert v1.content_blocks == v2.content_blocks1870 assert v1.content == v2.content1871 # Sanity-check that we actually exercised the reasoning + text path.1872 block_types = [b["type"] for b in v1.content_blocks]1873 assert "reasoning" in block_types1874 assert "text" in block_types18751876 # Usage: core counts must match; provider detail subdicts are1877 # dropped by `_to_protocol_usage` because `langchain_protocol.UsageInfo`1878 # doesn't list them. Tracked as a protocol-repo change.1879 detail_keys = {"input_token_details", "output_token_details"}1880 v1_usage = {1881 k: v for k, v in (v1.usage_metadata or {}).items() if k not in detail_keys1882 }1883 v2_usage = {1884 k: v for k, v in (v2.usage_metadata or {}).items() if k not in detail_keys1885 }1886 assert v1_usage == v2_usage18871888 # Response metadata must match. The Responses API doesn't put1889 # `finish_reason` in per-chunk metadata, so neither the v1 reduction1890 # nor the v2 bridge ends up with one. (Protocol 0.0.10 dropped the1891 # v2 bridge's default `"stop"` synthesis; provider metadata now1892 # passes through unchanged.)1893 assert v1.response_metadata == v2.response_metadata
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.