Ensure functions have docstrings for documentation
def is_openai_data_block(
1import re2from collections.abc import Sequence3from typing import (4 TYPE_CHECKING,5 Any,6 Literal,7 TypedDict,8 TypeVar,9)1011if TYPE_CHECKING:12 from langchain_core.messages import BaseMessage13from langchain_core.messages.content import (14 ContentBlock,15)161718def _filter_invocation_params_for_tracing(params: dict[str, Any]) -> dict[str, Any]:19 """Filter out large/inappropriate fields from invocation params for tracing.2021 Removes fields like tools, functions, messages, response_format that can be large.2223 Args:24 params: The invocation parameters to filter.2526 Returns:27 The filtered parameters with large fields removed.28 """29 excluded_keys = {"tools", "functions", "messages", "response_format"}30 return {k: v for k, v in params.items() if k not in excluded_keys}313233def is_openai_data_block(34 block: dict, filter_: Literal["image", "audio", "file"] | None = None35) -> bool:36 """Check whether a block contains multimodal data in OpenAI Chat Completions format.3738 Supports both data and ID-style blocks (e.g. `'file_data'` and `'file_id'`)3940 If additional keys are present, they are ignored / will not affect outcome as long41 as the required keys are present and valid.4243 Args:44 block: The content block to check.45 filter_: If provided, only return True for blocks matching this specific type.46 - "image": Only match image_url blocks47 - "audio": Only match input_audio blocks48 - "file": Only match file blocks49 If `None`, match any valid OpenAI data block type. Note that this means that50 if the block has a valid OpenAI data type but the filter_ is set to a51 different type, this function will return False.5253 Returns:54 `True` if the block is a valid OpenAI data block and matches the filter_55 (if provided).5657 """58 if block.get("type") == "image_url":59 if filter_ is not None and filter_ != "image":60 return False61 if (62 (set(block.keys()) <= {"type", "image_url", "detail"})63 and (image_url := block.get("image_url"))64 and isinstance(image_url, dict)65 ):66 url = image_url.get("url")67 if isinstance(url, str):68 # Required per OpenAI spec69 return True70 # Ignore `'detail'` since it's optional and specific to OpenAI7172 elif block.get("type") == "input_audio":73 if filter_ is not None and filter_ != "audio":74 return False75 if (audio := block.get("input_audio")) and isinstance(audio, dict):76 audio_data = audio.get("data")77 audio_format = audio.get("format")78 # Both required per OpenAI spec79 if isinstance(audio_data, str) and isinstance(audio_format, str):80 return True8182 elif block.get("type") == "file":83 if filter_ is not None and filter_ != "file":84 return False85 if (file := block.get("file")) and isinstance(file, dict):86 file_data = file.get("file_data")87 file_id = file.get("file_id")88 # Files can be either base64-encoded or pre-uploaded with an ID89 if isinstance(file_data, str) or isinstance(file_id, str):90 return True9192 else:93 return False9495 # Has no `'type'` key96 return False979899class ParsedDataUri(TypedDict):100 source_type: Literal["base64"]101 data: str102 mime_type: str103104105def _parse_data_uri(uri: str) -> ParsedDataUri | None:106 """Parse a data URI into its components.107108 If parsing fails, return `None`. If either MIME type or data is missing, return109 `None`.110111 Example:112 ```python113 data_uri = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."114 parsed = _parse_data_uri(data_uri)115116 assert parsed == {117 "source_type": "base64",118 "mime_type": "image/jpeg",119 "data": "/9j/4AAQSkZJRg...",120 }121 ```122 """123 regex = r"^data:(?P<mime_type>[^;]+);base64,(?P<data>.+)$"124 match = re.match(regex, uri)125 if match is None:126 return None127128 mime_type = match.group("mime_type")129 data = match.group("data")130 if not mime_type or not data:131 return None132133 return {134 "source_type": "base64",135 "data": data,136 "mime_type": mime_type,137 }138139140def _normalize_messages(141 messages: Sequence["BaseMessage"],142) -> list["BaseMessage"]:143 """Normalize message formats to LangChain v1 standard content blocks.144145 Chat models already implement support for:146 - Images in OpenAI Chat Completions format147 These will be passed through unchanged148 - LangChain v1 standard content blocks149150 This function extends support to:151 - `[Audio](https://platform.openai.com/docs/api-reference/chat/create) and152 `[file](https://platform.openai.com/docs/api-reference/files) data in OpenAI153 Chat Completions format154 - Images are technically supported but we expect chat models to handle them155 directly; this may change in the future156 - LangChain v0 standard content blocks for backward compatibility157158 !!! warning "Behavior changed in `langchain-core` 1.0.0"159160 In previous versions, this function returned messages in LangChain v0 format.161 Now, it returns messages in LangChain v1 format, which upgraded chat models now162 expect to receive when passing back in message history. For backward163 compatibility, this function will convert v0 message content to v1 format.164165 ??? note "v0 Content Block Schemas"166167 `URLContentBlock`:168169 ```python170 {171 mime_type: NotRequired[str]172 type: Literal['image', 'audio', 'file'],173 source_type: Literal['url'],174 url: str,175 }176 ```177178 `Base64ContentBlock`:179180 ```python181 {182 mime_type: NotRequired[str]183 type: Literal['image', 'audio', 'file'],184 source_type: Literal['base64'],185 data: str,186 }187 ```188189 `IDContentBlock`:190191 (In practice, this was never used)192193 ```python194 {195 type: Literal["image", "audio", "file"],196 source_type: Literal["id"],197 id: str,198 }199 ```200201 `PlainTextContentBlock`:202203 ```python204 {205 mime_type: NotRequired[str]206 type: Literal['file'],207 source_type: Literal['text'],208 url: str,209 }210 ```211212 If a v1 message is passed in, it will be returned as-is, meaning it is safe to213 always pass in v1 messages to this function for assurance.214215 For posterity, here are the OpenAI Chat Completions schemas we expect:216217 Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types218 png, jpeg/jpg, webp, static gif:219 {220 "type": Literal['image_url'],221 "image_url": {222 "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],223 "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI224 }225 }226227 Chat Completions audio:228 {229 "type": Literal['input_audio'],230 "input_audio": {231 "format": Literal['wav', 'mp3'],232 "data": str = "$BASE64_ENCODED_AUDIO",233 },234 }235236 Chat Completions files: either base64 or pre-uploaded file ID237 {238 "type": Literal['file'],239 "file": Union[240 {241 "filename": str | None = "$FILENAME",242 "file_data": str = "$BASE64_ENCODED_FILE",243 },244 {245 "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI246 },247 ],248 }249250 """251 from langchain_core.messages.block_translators.langchain_v0 import ( # noqa: PLC0415252 _convert_legacy_v0_content_block_to_v1,253 )254 from langchain_core.messages.block_translators.openai import ( # noqa: PLC0415255 _convert_openai_format_to_data_block,256 )257258 formatted_messages = []259 for message in messages:260 # We preserve input messages - the caller may reuse them elsewhere and expects261 # them to remain unchanged. We only create a copy if we need to translate.262 formatted_message = message263264 if isinstance(message.content, list):265 for idx, block in enumerate(message.content):266 # OpenAI Chat Completions multimodal data blocks to v1 standard267 if (268 isinstance(block, dict)269 and block.get("type") in {"input_audio", "file"}270 # Discriminate between OpenAI/LC format since they share `'type'`271 and is_openai_data_block(block)272 ):273 formatted_message = _ensure_message_copy(message, formatted_message)274275 converted_block = _convert_openai_format_to_data_block(block)276 _update_content_block(formatted_message, idx, converted_block)277278 # Convert multimodal LangChain v0 to v1 standard content blocks279 elif (280 isinstance(block, dict)281 and block.get("type")282 in {283 "image",284 "audio",285 "file",286 }287 and block.get("source_type") # v1 doesn't have `source_type`288 in {289 "url",290 "base64",291 "id",292 "text",293 }294 ):295 formatted_message = _ensure_message_copy(message, formatted_message)296297 converted_block = _convert_legacy_v0_content_block_to_v1(block)298 _update_content_block(formatted_message, idx, converted_block)299 continue300301 # else, pass through blocks that look like they have v1 format unchanged302303 formatted_messages.append(formatted_message)304305 return formatted_messages306307308T = TypeVar("T", bound="BaseMessage")309310311def _ensure_message_copy(message: T, formatted_message: T) -> T:312 """Create a copy of the message if it hasn't been copied yet."""313 if formatted_message is message:314 formatted_message = message.model_copy()315 # Shallow-copy content list to allow modifications316 formatted_message.content = list(formatted_message.content)317 return formatted_message318319320def _update_content_block(321 formatted_message: "BaseMessage", idx: int, new_block: ContentBlock | dict322) -> None:323 """Update a content block at the given index, handling type issues."""324 # Type ignore needed because:325 # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`326 # - When content is str, indexing fails (index error)327 # - When content is list, the items are `Union[str, dict]` but we're assigning328 # `Union[ContentBlock, dict]` where ContentBlock is richer than dict329 # - This is safe because we only call this when we've verified content is a list and330 # we're doing content block conversions331 formatted_message.content[idx] = new_block # type: ignore[index, assignment]332333334def _update_message_content_to_blocks(message: T, output_version: str) -> T:335 return message.model_copy(336 update={337 "content": message.content_blocks,338 "response_metadata": {339 **message.response_metadata,340 "output_version": output_version,341 },342 }343 )
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.