libs/partners/fireworks/langchain_fireworks/chat_models.py · langchain-ai/langchain

1"""Fireworks chat wrapper."""23from __future__ import annotations45import contextlib6import json7import logging8from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence9from operator import itemgetter10from typing import (11    Any,12    Literal,13    NoReturn,14    TypeAlias,15    cast,16)1718import httpx19from fireworks import (20    APIConnectionError,21    AsyncFireworks,22    BadRequestError,23    Fireworks,24    FireworksError,25    InternalServerError,26    RateLimitError,27)28from langchain_core.callbacks import (29    AsyncCallbackManagerForLLMRun,30    CallbackManagerForLLMRun,31)32from langchain_core.exceptions import ContextOverflowError33from langchain_core.language_models import (34    LanguageModelInput,35    ModelProfile,36    ModelProfileRegistry,37)38from langchain_core.language_models.chat_models import (39    BaseChatModel,40    LangSmithParams,41    agenerate_from_stream,42    generate_from_stream,43)44from langchain_core.language_models.llms import create_base_retry_decorator45from langchain_core.messages import (46    AIMessage,47    AIMessageChunk,48    BaseMessage,49    BaseMessageChunk,50    ChatMessage,51    ChatMessageChunk,52    FunctionMessage,53    FunctionMessageChunk,54    HumanMessage,55    HumanMessageChunk,56    InvalidToolCall,57    SystemMessage,58    SystemMessageChunk,59    ToolCall,60    ToolMessage,61    ToolMessageChunk,62    UsageMetadata,63    is_data_content_block,64)65from langchain_core.messages.block_translators.openai import (66    convert_to_openai_data_block,67)68from langchain_core.messages.tool import (69    ToolCallChunk,70)71from langchain_core.messages.tool import (72    tool_call_chunk as create_tool_call_chunk,73)74from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser75from langchain_core.output_parsers.base import OutputParserLike76from langchain_core.output_parsers.openai_tools import (77    JsonOutputKeyToolsParser,78    PydanticToolsParser,79    make_invalid_tool_call,80    parse_tool_call,81)82from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult83from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough84from langchain_core.tools import BaseTool85from langchain_core.utils import (86    get_pydantic_field_names,87)88from langchain_core.utils._gateway import _apply_gateway_config89from langchain_core.utils.function_calling import (90    convert_to_json_schema,91    convert_to_openai_tool,92)93from langchain_core.utils.pydantic import is_basemodel_subclass94from langchain_core.utils.utils import _build_model_kwargs95from pydantic import (96    BaseModel,97    ConfigDict,98    Field,99    PrivateAttr,100    SecretStr,101    model_validator,102)103from typing_extensions import Self104105from langchain_fireworks._compat import _convert_from_v1_to_chat_completions106from langchain_fireworks._version import __version__107from langchain_fireworks.data._profiles import _PROFILES108109logger = logging.getLogger(__name__)110111112_MODEL_PROFILES = cast("ModelProfileRegistry", _PROFILES)113114115def _get_default_model_profile(model_name: str) -> ModelProfile:116    default = _MODEL_PROFILES.get(model_name) or {}117    return default.copy()118119120def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:121    """Convert a dictionary to a LangChain message.122123    Args:124        _dict: The dictionary.125126    Returns:127        The LangChain message.128129    """130    role = _dict.get("role")131    if role == "user":132        return HumanMessage(content=_dict.get("content", ""))133    if role == "assistant":134        # Fix for azure135        # Also Fireworks returns None for tool invocations136        content = _dict.get("content", "") or ""137        additional_kwargs: dict = {}138        if reasoning_content := _dict.get("reasoning_content"):139            additional_kwargs["reasoning_content"] = reasoning_content140141        if function_call := _dict.get("function_call"):142            additional_kwargs["function_call"] = dict(function_call)143144        tool_calls = []145        invalid_tool_calls = []146        if raw_tool_calls := _dict.get("tool_calls"):147            additional_kwargs["tool_calls"] = raw_tool_calls148            for raw_tool_call in raw_tool_calls:149                try:150                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))151                except Exception as e:152                    invalid_tool_calls.append(153                        dict(make_invalid_tool_call(raw_tool_call, str(e)))154                    )155        return AIMessage(156            content=content,157            additional_kwargs=additional_kwargs,158            tool_calls=tool_calls,159            invalid_tool_calls=invalid_tool_calls,160        )161    if role == "system":162        return SystemMessage(content=_dict.get("content", ""))163    if role == "function":164        return FunctionMessage(165            content=_dict.get("content", ""), name=_dict.get("name", "")166        )167    if role == "tool":168        additional_kwargs = {}169        if "name" in _dict:170            additional_kwargs["name"] = _dict["name"]171        return ToolMessage(172            content=_dict.get("content", ""),173            tool_call_id=_dict.get("tool_call_id", ""),174            additional_kwargs=additional_kwargs,175        )176    return ChatMessage(content=_dict.get("content", ""), role=role or "")177178179def _allowed_content_part_keys() -> frozenset[str]:180    """Allowlist of wire-valid keys on a Fireworks content part.181182    Derived at import time from the stainless-generated TypedDict so the183    allowlist tracks the upstream OpenAPI spec as `fireworks-ai` is bumped:184    new fields widen the allowlist for free, removed/renamed fields shrink it185    in lockstep. If the SDK reshuffles its module layout the import falls back186    to a conservative hand-coded set and emits a warning, and the layout test187    (`test_fireworks_sdk_request_layout_stable`) fails to surface the drift.188    """189    try:190        from typing import get_type_hints191192        from fireworks.types.shared_params.chat_message import (193            ContentUnionMember1,194        )195196        return frozenset(get_type_hints(ContentUnionMember1))197    except ImportError:198        logger.warning(199            "Could not import `fireworks.types.shared_params.chat_message."200            "ContentUnionMember1`; falling back to a conservative content-part "201            "key allowlist. Bump `fireworks-ai` or update "202            "`_allowed_content_part_keys` if the SDK has moved this type.",203        )204        return frozenset({"type", "text", "image_url", "video_url"})205206207_ALLOWED_CONTENT_PART_KEYS: frozenset[str] = _allowed_content_part_keys()208209210def _sanitize_chat_completions_content(content: Any) -> Any:211    """Strip non-wire keys from content blocks before serializing to Fireworks.212213    Fireworks's chat completions endpoint rejects unknown fields on message214    content parts with `Extra inputs are not permitted, field: 'messages[N]215    .content.list[ChatMessageContent][i].<key>'`. This surfaces when a216    conversation accumulates AIMessages from a different provider (e.g.217    Anthropic's v1 streaming-reassembly `index` marker on text blocks, or the218    LangChain-internal `caller` key on `tool_use` blocks) and that history is219    later forwarded to a Fireworks-hosted model.220221    For list content:222        - each block dict is filtered down to keys in223            `_ALLOWED_CONTENT_PART_KEYS` (sourced from the SDK TypedDict, so it224            stays in sync with the upstream spec).225        - if the result is a list of exactly one block that, post-strip, is226            `{"type": "text", "text": <str>}` and nothing else, it is coerced to227            a plain string. Fireworks's `content` union lists `str` first228            (`Input should be a valid string, field: 'messages[N].content.str'`),229            and the stricter shape avoids the union-validation noise on the230            server side.231    Non-list content (strings, None) passes through unchanged.232    """233    if not isinstance(content, list):234        return content235    sanitized: list[Any] = []236    for block in content:237        if isinstance(block, dict):238            sanitized.append(239                {k: v for k, v in block.items() if k in _ALLOWED_CONTENT_PART_KEYS}240            )241        else:242            sanitized.append(block)243    if (244        len(sanitized) == 1245        and isinstance(sanitized[0], dict)246        and set(sanitized[0]) == {"type", "text"}247        and sanitized[0]["type"] == "text"248        and isinstance(sanitized[0]["text"], str)249    ):250        return sanitized[0]["text"]251    return sanitized252253254def _format_message_content(content: Any) -> Any:255    """Format message content for the Fireworks chat completions wire format.256257    Adapted from `langchain_openai.chat_models.base._format_message_content`,258    scoped to the chat completions API: drops content block types the wire259    format does not carry, translates canonical v0/v1 multimodal data blocks260    via `convert_to_openai_data_block(block, api="chat/completions")`, and261    converts legacy Anthropic-shape image blocks (`{"type": "image",262    "source": {...}}`) to OpenAI `image_url` blocks. String and non-list263    content are returned unchanged.264265    Args:266        content: The message content. Strings and non-list values are267            returned as-is; lists are walked block by block.268269    Returns:270        The formatted content, ready to be placed on the chat completions271        wire. List inputs return a new list with translations applied; other272        inputs are returned unchanged.273    """274    if not isinstance(content, list):275        return content276    formatted: list[Any] = []277    for block in content:278        if isinstance(block, dict) and "type" in block:279            btype = block["type"]280            if btype in (281                "tool_use",282                "thinking",283                "reasoning_content",284                "function_call",285                "code_interpreter_call",286            ):287                continue288            if is_data_content_block(block):289                formatted.append(290                    convert_to_openai_data_block(block, api="chat/completions")291                )292                continue293            if (294                btype == "image"295                and (source := block.get("source"))296                and isinstance(source, dict)297            ):298                if (299                    source.get("type") == "base64"300                    and (media_type := source.get("media_type"))301                    and (data := source.get("data"))302                ):303                    formatted.append(304                        {305                            "type": "image_url",306                            "image_url": {"url": f"data:{media_type};base64,{data}"},307                        }308                    )309                    continue310                if source.get("type") == "url" and (url := source.get("url")):311                    formatted.append({"type": "image_url", "image_url": {"url": url}})312                    continue313                continue314        formatted.append(block)315    return formatted316317318def _convert_message_to_dict(message: BaseMessage) -> dict:319    """Convert a LangChain message to a dictionary.320321    Args:322        message: The LangChain message.323324    Returns:325        The dictionary.326327    """328    message_dict: dict[str, Any]329    if isinstance(message, ChatMessage):330        message_dict = {331            "role": message.role,332            "content": _sanitize_chat_completions_content(333                _format_message_content(message.content)334            ),335        }336    elif isinstance(message, HumanMessage):337        message_dict = {338            "role": "user",339            "content": _sanitize_chat_completions_content(340                _format_message_content(message.content)341            ),342        }343    elif isinstance(message, AIMessage):344        # Translate v1 content345        if message.response_metadata.get("output_version") == "v1":346            message = _convert_from_v1_to_chat_completions(message)347        message_dict = {348            "role": "assistant",349            "content": _sanitize_chat_completions_content(350                _format_message_content(message.content)351            ),352        }353        if "function_call" in message.additional_kwargs:354            message_dict["function_call"] = message.additional_kwargs["function_call"]355            # If function call only, content is None not empty string356            if message_dict["content"] == "":357                message_dict["content"] = None358        if message.tool_calls or message.invalid_tool_calls:359            message_dict["tool_calls"] = [360                _lc_tool_call_to_fireworks_tool_call(tc) for tc in message.tool_calls361            ] + [362                _lc_invalid_tool_call_to_fireworks_tool_call(tc)363                for tc in message.invalid_tool_calls364            ]365        elif "tool_calls" in message.additional_kwargs:366            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]367        # If tool calls only, content is None not empty string368        if "tool_calls" in message_dict and message_dict["content"] == "":369            message_dict["content"] = None370        else:371            pass372    elif isinstance(message, SystemMessage):373        message_dict = {374            "role": "system",375            "content": _sanitize_chat_completions_content(376                _format_message_content(message.content)377            ),378        }379    elif isinstance(message, FunctionMessage):380        message_dict = {381            "role": "function",382            "content": message.content,383            "name": message.name,384        }385    elif isinstance(message, ToolMessage):386        message_dict = {387            "role": "tool",388            "content": _sanitize_chat_completions_content(389                _format_message_content(message.content)390            ),391            "tool_call_id": message.tool_call_id,392        }393    else:394        msg = f"Got unknown type {message}"395        raise TypeError(msg)396    if "name" in message.additional_kwargs:397        message_dict["name"] = message.additional_kwargs["name"]398    return message_dict399400401def _usage_to_metadata(usage: Mapping[str, Any]) -> UsageMetadata:402    input_tokens = usage.get("prompt_tokens") or 0403    output_tokens = usage.get("completion_tokens") or 0404    usage_metadata: UsageMetadata = {405        "input_tokens": input_tokens,406        "output_tokens": output_tokens,407        "total_tokens": usage.get("total_tokens") or input_tokens + output_tokens,408    }409    cached_tokens = (usage.get("prompt_tokens_details") or {}).get("cached_tokens")410    if cached_tokens is not None:411        usage_metadata["input_token_details"] = {"cache_read": cached_tokens}412    return usage_metadata413414415TokenUsageTree: TypeAlias = "int | dict[str, TokenUsageTree]"416"""Raw provider token usage: a tree of `int` leaves and nested `dict` nodes417(e.g. `prompt_tokens_details`).418419Modeled as a recursive alias so the merge helper's signature carries the shape420rather than leaving it to `Any`.421"""422423424def _update_token_usage(425    overall_token_usage: TokenUsageTree, new_usage: TokenUsageTree426) -> TokenUsageTree:427    """Recursively merge raw provider token usage across generations.428429    Token usage is a tree of `int` leaves (summed) and `dict` nodes such as430    `prompt_tokens_details` (merged key-by-key, skipping `None` values).431432    A type mismatch between the accumulator and the incoming value (e.g. an433    `int` on one side and a `dict` on the other) indicates malformed provider434    data and is raised rather than silently coerced. An entirely unexpected435    leaf type (neither `int` nor `dict`) is logged and passed through, so a436    telemetry anomaly degrades gracefully instead of failing the response.437    """438    if isinstance(new_usage, int):439        if not isinstance(overall_token_usage, int):440            msg = (441                "Got different types for token usage: "442                f"{new_usage!r} ({type(new_usage).__name__}) and "443                f"{overall_token_usage!r} ({type(overall_token_usage).__name__})"444            )445            raise ValueError(msg)446        return overall_token_usage + new_usage447    if isinstance(new_usage, dict):448        if not isinstance(overall_token_usage, dict):449            msg = (450                "Got different types for token usage: "451                f"{new_usage!r} ({type(new_usage).__name__}) and "452                f"{overall_token_usage!r} ({type(overall_token_usage).__name__})"453            )454            raise ValueError(msg)455        updated_token_usage = dict(overall_token_usage)456        for key, value in new_usage.items():457            if value is not None:458                # Seed a first-seen key with an empty node of the same kind so a459                # nested `dict` value merges rather than colliding with an `int`.460                default: TokenUsageTree = {} if isinstance(value, dict) else 0461                updated_token_usage[key] = _update_token_usage(462                    overall_token_usage.get(key, default), value463                )464        return updated_token_usage465    logger.warning("Unexpected type for token usage: %s", type(new_usage).__name__)466    return new_usage467468469def _convert_chunk_to_message_chunk(470    chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]471) -> BaseMessageChunk:472    choices = chunk.get("choices") or []473    response_metadata: dict[str, Any] = {"model_provider": "fireworks"}474    if service_tier := chunk.get("service_tier"):475        response_metadata["service_tier"] = service_tier476    if not choices:477        # Final chunk emitted when `stream_options.include_usage=True`:478        # `choices` is empty and the chunk carries only `usage`.479        usage = chunk.get("usage")480        if not usage:481            logger.debug(482                "Received stream chunk with no choices and no usage: %s", chunk483            )484        usage_metadata = _usage_to_metadata(usage) if usage else None485        return AIMessageChunk(486            content="",487            usage_metadata=usage_metadata,488            response_metadata=response_metadata,489        )490    choice = choices[0]491    _dict = choice["delta"]492    role = cast(str, _dict.get("role"))493    content = cast(str, _dict.get("content") or "")494    additional_kwargs: dict = {}495    tool_call_chunks: list[ToolCallChunk] = []496    if _dict.get("function_call"):497        function_call = dict(_dict["function_call"])498        if "name" in function_call and function_call["name"] is None:499            function_call["name"] = ""500        additional_kwargs["function_call"] = function_call501    if raw_tool_calls := _dict.get("tool_calls"):502        additional_kwargs["tool_calls"] = raw_tool_calls503        for rtc in raw_tool_calls:504            with contextlib.suppress(KeyError):505                tool_call_chunks.append(506                    create_tool_call_chunk(507                        name=rtc["function"].get("name"),508                        args=rtc["function"].get("arguments"),509                        id=rtc.get("id"),510                        index=rtc.get("index"),511                    )512                )513    if role == "user" or default_class == HumanMessageChunk:514        return HumanMessageChunk(content=content)515    if role == "assistant" or default_class == AIMessageChunk:516        usage = chunk.get("usage")517        usage_metadata = _usage_to_metadata(usage) if usage else None518        return AIMessageChunk(519            content=content,520            additional_kwargs=additional_kwargs,521            tool_call_chunks=tool_call_chunks,522            usage_metadata=usage_metadata,523            response_metadata=response_metadata,524        )525    if role == "system" or default_class == SystemMessageChunk:526        return SystemMessageChunk(content=content)527    if role == "function" or default_class == FunctionMessageChunk:528        return FunctionMessageChunk(content=content, name=_dict["name"])529    if role == "tool" or default_class == ToolMessageChunk:530        return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])531    if role or default_class == ChatMessageChunk:532        return ChatMessageChunk(content=content, role=role)533    return default_class(content=content)  # type: ignore[call-arg]534535536class _RetryableHTTPStatusError(FireworksError):537    """Internal marker for 5xx `httpx.HTTPStatusError` responses.538539    The 1.x SDK wraps every status response into a typed `APIStatusError`540    subclass, so this path is defense-in-depth: it only fires when a raw541    `httpx.HTTPStatusError` escapes the SDK (e.g., a custom `http_client` or542    monkey-patched transport raises one directly). Promoting it here keeps the543    retryable set expressible as a list of classes for544    `create_base_retry_decorator`.545    """546547548_RETRYABLE_ERRORS: tuple[type[BaseException], ...] = (549    APIConnectionError,550    InternalServerError,551    RateLimitError,552    httpx.TimeoutException,553    httpx.TransportError,554    _RetryableHTTPStatusError,555)556557558def _promote_http_status_error(exc: httpx.HTTPStatusError) -> NoReturn:559    """Re-raise 5xx `httpx.HTTPStatusError` as a retryable marker."""560    if exc.response.status_code >= 500:561        msg = f"Retryable {exc.response.status_code} from Fireworks: {exc}"562        raise _RetryableHTTPStatusError(msg) from exc563    raise exc564565566class FireworksContextOverflowError(BadRequestError, ContextOverflowError):567    """`BadRequestError` raised when input exceeds Fireworks's context limit."""568569570def _handle_fireworks_invalid_request(e: BadRequestError) -> NoReturn:571    """Promote prompt-too-long errors to `FireworksContextOverflowError`."""572    if "prompt is too long" in str(e):573        raise FireworksContextOverflowError(574            str(e), response=e.response, body=e.body575        ) from e576    raise e577578579def _raise_empty_stream() -> NoReturn:580    """Raise a descriptive error when the SDK returns a zero-chunk stream."""581    msg = "Received empty stream from Fireworks"582    raise FireworksError(msg)583584585def _create_retry_decorator(586    llm: ChatFireworks,587    run_manager: AsyncCallbackManagerForLLMRun | CallbackManagerForLLMRun | None = None,588) -> Callable[[Any], Any]:589    """Return a tenacity retry decorator for Fireworks SDK calls.590591    Retries live here rather than in the SDK so each attempt is visible to the592    LangChain `run_manager.on_retry` callback. The SDK's own retry layer is593    suppressed via `max_retries=0` on the client; see `validate_environment`.594    """595    # `max_retries` counts retries *after* the initial attempt (default lives on596    # the `ChatFireworks.max_retries` field). `create_base_retry_decorator`597    # forwards its `max_retries` to `stop_after_attempt`, which counts total598    # attempts — so offset by 1. `None` and `0` both mean "single attempt, no599    # retries".600    attempts = (llm.max_retries + 1) if llm.max_retries else 1601    return create_base_retry_decorator(602        error_types=list(_RETRYABLE_ERRORS),603        max_retries=attempts,604        run_manager=run_manager,605    )606607608def _prepare_sdk_kwargs(kwargs: dict[str, Any]) -> dict[str, Any]:609    """Move fields the 1.x SDK does not model into `extra_body`.610611    The Stainless-generated `chat.completions.create` signature has a fixed set612    of typed parameters. Fireworks accepts additional fields on the wire (notably613    `stream_options.include_usage`) that the SDK schema does not declare. The614    SDK exposes `extra_body` precisely for this — merge anything that looks615    extra-body-shaped into it so it lands in the JSON request body.616617    If a caller supplies both `extra_body={"stream_options": ...}` and a618    top-level `stream_options=...`, the value already in `extra_body` wins619    (callers using `extra_body` are presumed to want explicit control); the620    discarded top-level value is logged.621    """622    extra_body = dict(kwargs.pop("extra_body", None) or {})623    top_level_stream_options = kwargs.pop("stream_options", None)624    if top_level_stream_options is not None:625        if "stream_options" in extra_body:626            logger.warning(627                "Both `extra_body['stream_options']` and a top-level "628                "`stream_options` were supplied; using `extra_body`'s value "629                "and discarding the top-level value.",630            )631        else:632            extra_body["stream_options"] = top_level_stream_options633    if extra_body:634        kwargs["extra_body"] = extra_body635    return kwargs636637638def _completion_with_retry(639    llm: ChatFireworks,640    run_manager: CallbackManagerForLLMRun | None = None,641    **kwargs: Any,642) -> Any:643    """Retry the sync completion call, including stream setup."""644    retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)645    kwargs = _prepare_sdk_kwargs(kwargs)646647    @retry_decorator648    def _call() -> Any:649        try:650            result = llm.client.create(**kwargs)651        except httpx.HTTPStatusError as e:652            _promote_http_status_error(e)653        if kwargs.get("stream"):654            # The streaming generator is lazy — advance once so the HTTP655            # connection and any transport error happen inside the retry656            # boundary. `_prepend_chunk` then re-yields the consumed chunk657            # ahead of the rest so callers still see every event.658            try:659                iterator = iter(result)660                first = next(iterator)661            except StopIteration:662                _raise_empty_stream()663            except httpx.HTTPStatusError as e:664                _promote_http_status_error(e)665            return _prepend_chunk(first, iterator)666        return result667668    return _call()669670671async def _acompletion_with_retry(672    llm: ChatFireworks,673    run_manager: AsyncCallbackManagerForLLMRun | None = None,674    **kwargs: Any,675) -> Any:676    """Retry the async completion call, including stream setup."""677    retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)678    kwargs = _prepare_sdk_kwargs(kwargs)679680    @retry_decorator681    async def _call() -> Any:682        if kwargs.get("stream"):683            try:684                # 1.x async `create()` is a coroutine that resolves to an685                # `AsyncStream` when `stream=True`. Await it, then advance the686                # async iterator once inside the retry boundary so transport687                # errors surface here rather than at first downstream consumer.688                result = await llm.async_client.create(**kwargs)689                agen = result.__aiter__()690                first = await agen.__anext__()691            except StopAsyncIteration:692                _raise_empty_stream()693            except httpx.HTTPStatusError as e:694                _promote_http_status_error(e)695            return _aprepend_chunk(first, agen)696        try:697            return await llm.async_client.create(**kwargs)698        except httpx.HTTPStatusError as e:699            _promote_http_status_error(e)700701    return await _call()702703704def _prepend_chunk(first: Any, rest: Iterator[Any]) -> Iterator[Any]:705    yield first706    yield from rest707708709async def _aprepend_chunk(first: Any, rest: AsyncIterator[Any]) -> AsyncIterator[Any]:710    yield first711    async for item in rest:712        yield item713714715class ChatFireworks(BaseChatModel):716    """`Fireworks` Chat large language models API.717718    To use, you should have the719    environment variable `FIREWORKS_API_KEY` set with your API key.720721    Any parameters that are valid to be passed to the fireworks.create call722    can be passed in, even if not explicitly saved on this class.723724    Example:725        ```python726        from langchain_fireworks.chat_models import ChatFireworks727728        model = ChatFireworks(model_name="accounts/fireworks/models/gpt-oss-120b")729        ```730731    Fireworks request headers can be passed with `extra_headers`. For prompt732    caching, `x-session-affinity` pins requests to a replica so related calls can733    reuse the same prompt-cache session:734735    ```python736    model.invoke(737        "Hello",738        extra_headers={"x-session-affinity": "user-42"},739    )740    ```741742    The Fireworks SDK also accepts a typed `prompt_cache_key` field (passed as a743    regular keyword argument), which it treats as the preferred alternative to744    the raw `x-session-affinity` header:745746    ```python747    model.invoke("Hello", prompt_cache_key="user-42")748    ```749    """750751    @property752    def lc_secrets(self) -> dict[str, str]:753        return {"fireworks_api_key": "FIREWORKS_API_KEY"}754755    @classmethod756    def get_lc_namespace(cls) -> list[str]:757        """Get the namespace of the LangChain object.758759        Returns:760            `["langchain", "chat_models", "fireworks"]`761        """762        return ["langchain", "chat_models", "fireworks"]763764    @property765    def lc_attributes(self) -> dict[str, Any]:766        attributes: dict[str, Any] = {}767        if self.fireworks_api_base:768            attributes["fireworks_api_base"] = self.fireworks_api_base769770        return attributes771772    @classmethod773    def is_lc_serializable(cls) -> bool:774        """Return whether this model can be serialized by LangChain."""775        return True776777    client: Any = Field(default=None, exclude=True)778    """Internal `fireworks.Fireworks().chat.completions` resource.779780    Constructed with `max_retries=0` so retries are owned by781    `_create_retry_decorator` (which surfaces each attempt to the LangChain782    `run_manager`). Callers reaching for this directly should set their own783    retry layer.784    """785786    async_client: Any = Field(default=None, exclude=True)787    """Internal `fireworks.AsyncFireworks().chat.completions` resource.788789    Constructed with `max_retries=0`; see `client`.790    """791792    _sdk_client: Any = PrivateAttr(default=None)793    """Owning `fireworks.Fireworks` instance, retained so `close()` can call794    into the underlying HTTPX client. The 1.x SDK does not expose lifecycle795    methods on the `chat.completions` resource itself.796    """797798    _async_sdk_client: Any = PrivateAttr(default=None)799    """Owning `fireworks.AsyncFireworks` instance; see `_sdk_client`."""800801    model_name: str = Field(alias="model")802    """Model name to use."""803804    @property805    def model(self) -> str:806        """Same as model_name."""807        return self.model_name808809    temperature: float | None = None810    """What sampling temperature to use."""811812    stop: str | list[str] | None = Field(default=None, alias="stop_sequences")813    """Default stop sequences."""814815    model_kwargs: dict[str, Any] = Field(default_factory=dict)816    """Holds any model parameters valid for `create` call not explicitly specified."""817818    fireworks_api_key: SecretStr = Field(default=SecretStr(""), alias="api_key")819    """Fireworks API key.820821    Automatically read from env variable `FIREWORKS_API_KEY` if not provided.822823    If `LANGSMITH_GATEWAY` is enabled and the base URL points at the gateway,824    `LANGSMITH_GATEWAY_API_KEY` is used instead.825    """826827    fireworks_api_base: str | None = Field(default=None, alias="base_url")828    """Base URL path for API requests, leave blank if not using a proxy or service829    emulator.830831    If `LANGSMITH_GATEWAY` is set, it is used as a fallback after `FIREWORKS_API_BASE`.832    """833834    request_timeout: float | tuple[float, float] | Any | None = Field(835        default=None, alias="timeout"836    )837    """Timeout for requests to Fireworks completion API. Can be `float`,838    `httpx.Timeout` or `None`.839    """840841    streaming: bool = False842    """Whether to stream the results or not."""843844    stream_usage: bool = True845    """Whether to include usage metadata in streaming output.846847    If `True`, a final empty-content chunk carrying `usage_metadata` is emitted848    during the stream. Set to `False` if the upstream model/proxy rejects849    `stream_options`, or pass `stream_options` explicitly via `model_kwargs` or850    a runtime kwarg to override.851852    !!! version-added "Added in `langchain-fireworks` 1.2.0"853854    !!! warning "Behavior changed in `langchain-fireworks` 1.2.0"855856        Streaming now opts into `stream_options.include_usage` by default, and857        the final empty-`choices` chunk is surfaced as an `AIMessageChunk` with858        `usage_metadata` instead of being silently dropped.859    """860861    n: int = 1862    """Number of chat completions to generate for each prompt."""863864    max_tokens: int | None = None865    """Maximum number of tokens to generate."""866867    max_retries: int | None = 2868    """Maximum number of retries after the initial attempt when generating.869870    Retries use exponential backoff and trigger on transient errors:871    `RateLimitError`, `APIConnectionError` (including its `APITimeoutError`872    subclass), 5xx responses (including those that surface as873    `httpx.HTTPStatusError` rather than typed SDK errors), and underlying874    transport errors (`httpx.TimeoutException`, `httpx.TransportError`).875    A value of `None` or `0` disables retries.876    """877878    service_tier: str | None = None879    """Service tier for the request.880881    Forwarded as the `service_tier` field on the Fireworks chat completions882    request when set. Pass `'priority'` to opt into Fireworks' priority tier;883    leave as `None` to use the default tier.884885    To use Fireworks' fast mode instead, select a fast-routed `model`; fast mode886    is not controlled by this field. See Fireworks'887    [serverless product docs](https://docs.fireworks.ai/guides/serverless-products)888    for the current list of fast routers and tiers.889890    !!! version-added "Added in `langchain-fireworks` 1.3.0"891    """892    reasoning_effort: str | None = None893    """Reasoning effort.894895    Forwarded as the `reasoning_effort` request field. Supported values vary by896    model; see the model's `profile.reasoning_effort_levels`.897898    Can also be passed at call time, e.g.899    `model.invoke(..., reasoning_effort="high")`.900    """901902    model_config = ConfigDict(903        populate_by_name=True,904    )905906    @model_validator(mode="before")907    @classmethod908    def build_extra(cls, values: dict[str, Any]) -> Any:909        """Build extra kwargs from additional params that were passed in."""910        all_required_field_names = get_pydantic_field_names(cls)911        return _build_model_kwargs(values, all_required_field_names)912913    @model_validator(mode="before")914    @classmethod915    def _resolve_gateway(cls, values: Any) -> Any:916        """Resolve the base URL and API key, applying LangSmith gateway settings.917918        An explicit ``base_url``/``api_key`` always wins. Otherwise the base URL919        falls back to ``FIREWORKS_API_BASE``, then the LangSmith gateway. The920        gateway key is preferred only when the base URL came from the gateway;921        for any other endpoint the provider key wins, and the gateway key is a922        candidate only when the gateway is enabled.923        """924        if isinstance(values, dict):925            config = _apply_gateway_config(926                values,927                cls,928                base_url_field="fireworks_api_base",929                api_key_field="fireworks_api_key",930                provider_path="fireworks",931                base_url_env="FIREWORKS_API_BASE",932                api_key_env="FIREWORKS_API_KEY",933            )934            if config.api_key is None:935                msg = (936                    "You must specify an api key. "937                    "You can pass it an argument as `api_key=...` or "938                    "set the environment variable `FIREWORKS_API_KEY`."939                )940                raise ValueError(msg)941        return values942943    @model_validator(mode="after")944    def _set_fireworks_chat_version(self) -> Self:945        """Set package version in metadata."""946        self._add_version("langchain-fireworks", __version__)947        return self948949    @model_validator(mode="after")950    def validate_environment(self) -> Self:951        """Validate that api key and python package exists in environment."""952        if self.n < 1:953            msg = "n must be at least 1."954            raise ValueError(msg)955        if self.n > 1 and self.streaming:956            msg = "n must be 1 when streaming."957            raise ValueError(msg)958959        api_key = self.fireworks_api_key.get_secret_value()960        base_url = self.fireworks_api_base961        # 0.x accepted a `(connect, read)` tuple. 1.x's SDK only accepts a962        # float, `httpx.Timeout`, or `None` — normalize so existing user code963        # keeps working.964        if isinstance(self.request_timeout, tuple):965            connect, read = self.request_timeout966            timeout: Any = httpx.Timeout(read, connect=connect)967        else:968            timeout = self.request_timeout969        # `langchain-fireworks` owns retry/backoff via `_create_retry_decorator`970        # so the LangChain `run_manager` sees each attempt. Suppress the971        # SDK's built-in retry layer to avoid double-retrying.972        if not self.client:973            self._sdk_client = Fireworks(974                api_key=api_key,975                base_url=base_url,976                timeout=timeout,977                max_retries=0,978            )979            self.client = self._sdk_client.chat.completions980        if not self.async_client:981            self._async_sdk_client = AsyncFireworks(982                api_key=api_key,983                base_url=base_url,984                timeout=timeout,985                max_retries=0,986            )987            self.async_client = self._async_sdk_client.chat.completions988        return self989990    def close(self) -> None:991        """Close the underlying sync HTTP client.992993        After calling, sync invocations on this model will raise. Async994        invocations remain available until `aclose()` is also called. Safe to995        call multiple times.996        """997        if self._sdk_client is not None:998            self._sdk_client.close()9991000    async def aclose(self) -> None:1001        """Close the underlying async HTTP client.10021003        Releases the aiohttp-backed connector that the 1.x SDK uses by1004        default. Without this, transient `ChatFireworks` instances can leak1005        an `Unclosed connector` warning at GC if the event loop has already1006        stopped. Safe to call multiple times.1007        """1008        if self._async_sdk_client is not None:1009            await self._async_sdk_client.close()10101011    def _resolve_model_profile(self) -> ModelProfile | None:1012        return _get_default_model_profile(self.model_name) or None10131014    @property1015    def _default_params(self) -> dict[str, Any]:1016        """Get the default parameters for calling Fireworks API."""1017        params = {1018            "model": self.model_name,1019            "stream": self.streaming,1020            "n": self.n,1021            "stop": self.stop,1022            **self.model_kwargs,1023        }1024        if self.temperature is not None:1025            params["temperature"] = self.temperature1026        if self.max_tokens is not None:1027            params["max_tokens"] = self.max_tokens1028        if self.service_tier is not None:1029            params["service_tier"] = self.service_tier1030        if self.reasoning_effort is not None:1031            params["reasoning_effort"] = self.reasoning_effort1032        return params10331034    def _get_ls_params(1035        self, stop: list[str] | None = None, **kwargs: Any1036    ) -> LangSmithParams:1037        """Get standard params for tracing."""1038        params = self._get_invocation_params(stop=stop, **kwargs)1039        ls_params = LangSmithParams(1040            ls_provider="fireworks",1041            ls_model_name=params.get("model", self.model_name),1042            ls_model_type="chat",1043            ls_temperature=params.get("temperature", self.temperature),1044        )1045        if ls_max_tokens := params.get("max_tokens", self.max_tokens):1046            ls_params["ls_max_tokens"] = ls_max_tokens1047        if ls_stop := stop or params.get("stop", None):1048            ls_params["ls_stop"] = ls_stop1049        return ls_params10501051    def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:1052        overall_token_usage: dict = {}1053        system_fingerprint = None1054        for output in llm_outputs:1055            if output is None:1056                # Happens in streaming1057                continue1058            token_usage = output.get("token_usage")1059            if token_usage is not None:1060                for k, v in token_usage.items():1061                    if v is None:1062                        continue1063                    if k in overall_token_usage:1064                        overall_token_usage[k] = _update_token_usage(1065                            overall_token_usage[k], v1066                        )1067                    else:1068                        overall_token_usage[k] = v1069            if system_fingerprint is None:1070                system_fingerprint = output.get("system_fingerprint")1071        combined = {"token_usage": overall_token_usage, "model_name": self.model_name}1072        if system_fingerprint:1073            combined["system_fingerprint"] = system_fingerprint1074        return combined10751076    def _stream(1077        self,1078        messages: list[BaseMessage],1079        stop: list[str] | None = None,1080        run_manager: CallbackManagerForLLMRun | None = None,1081        **kwargs: Any,1082    ) -> Iterator[ChatGenerationChunk]:1083        message_dicts, params = self._create_message_dicts(messages, stop)1084        params = {**params, **kwargs, "stream": True}1085        if self.stream_usage and "stream_options" not in params:1086            params["stream_options"] = {"include_usage": True}10871088        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1089        try:1090            stream = _completion_with_retry(1091                self, run_manager=run_manager, messages=message_dicts, **params1092            )1093        except BadRequestError as e:1094            _handle_fireworks_invalid_request(e)1095        for chunk in stream:1096            if not isinstance(chunk, dict):1097                chunk = chunk.model_dump()1098            message_chunk = _convert_chunk_to_message_chunk(chunk, default_chunk_class)1099            generation_info: dict[str, Any] = {}1100            logprobs = None1101            if choices := chunk.get("choices"):1102                choice = choices[0]1103                if finish_reason := choice.get("finish_reason"):1104                    generation_info["finish_reason"] = finish_reason1105                    generation_info["model_name"] = self.model_name1106                logprobs = choice.get("logprobs")1107                if logprobs:1108                    generation_info["logprobs"] = logprobs1109            default_chunk_class = message_chunk.__class__1110            generation_chunk = ChatGenerationChunk(1111                message=message_chunk, generation_info=generation_info or None1112            )1113            if run_manager:1114                run_manager.on_llm_new_token(1115                    generation_chunk.text, chunk=generation_chunk, logprobs=logprobs1116                )1117            yield generation_chunk11181119    def _generate(1120        self,1121        messages: list[BaseMessage],1122        stop: list[str] | None = None,1123        run_manager: CallbackManagerForLLMRun | None = None,1124        stream: bool | None = None,  # noqa: FBT0011125        **kwargs: Any,1126    ) -> ChatResult:1127        should_stream = stream if stream is not None else self.streaming1128        if should_stream:1129            stream_iter = self._stream(1130                messages, stop=stop, run_manager=run_manager, **kwargs1131            )1132            return generate_from_stream(stream_iter)1133        message_dicts, params = self._create_message_dicts(messages, stop)1134        params = {1135            **params,1136            **({"stream": stream} if stream is not None else {}),1137            **kwargs,1138        }1139        try:1140            response = _completion_with_retry(1141                self, run_manager=run_manager, messages=message_dicts, **params1142            )1143        except BadRequestError as e:1144            _handle_fireworks_invalid_request(e)1145        return self._create_chat_result(response)11461147    def _create_message_dicts(1148        self, messages: list[BaseMessage], stop: list[str] | None1149    ) -> tuple[list[dict[str, Any]], dict[str, Any]]:1150        params = self._default_params1151        if stop is not None:1152            params["stop"] = stop1153        message_dicts = [_convert_message_to_dict(m) for m in messages]1154        return message_dicts, params11551156    def _create_chat_result(self, response: dict | BaseModel) -> ChatResult:1157        generations = []1158        if not isinstance(response, dict):1159            response = response.model_dump()1160        token_usage = response.get("usage", {})1161        service_tier = response.get("service_tier")1162        for res in response["choices"]:1163            message = _convert_dict_to_message(res["message"])1164            if isinstance(message, AIMessage):1165                if token_usage:1166                    message.usage_metadata = _usage_to_metadata(token_usage)1167                    message.response_metadata["model_provider"] = "fireworks"1168                    message.response_metadata["model_name"] = self.model_name1169                if service_tier:1170                    message.response_metadata["service_tier"] = service_tier1171            generation_info = {"finish_reason": res.get("finish_reason")}1172            if "logprobs" in res:1173                generation_info["logprobs"] = res["logprobs"]1174            gen = ChatGeneration(1175                message=message,1176                generation_info=generation_info,1177            )1178            generations.append(gen)1179        llm_output = {1180            "token_usage": token_usage,1181            "system_fingerprint": response.get("system_fingerprint", ""),1182        }1183        if service_tier:1184            llm_output["service_tier"] = service_tier1185        return ChatResult(generations=generations, llm_output=llm_output)11861187    async def _astream(1188        self,1189        messages: list[BaseMessage],1190        stop: list[str] | None = None,1191        run_manager: AsyncCallbackManagerForLLMRun | None = None,1192        **kwargs: Any,1193    ) -> AsyncIterator[ChatGenerationChunk]:1194        message_dicts, params = self._create_message_dicts(messages, stop)1195        params = {**params, **kwargs, "stream": True}1196        if self.stream_usage and "stream_options" not in params:1197            params["stream_options"] = {"include_usage": True}11981199        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1200        try:1201            stream = await _acompletion_with_retry(1202                self, run_manager=run_manager, messages=message_dicts, **params1203            )1204        except BadRequestError as e:1205            _handle_fireworks_invalid_request(e)1206        async for chunk in stream:1207            if not isinstance(chunk, dict):1208                chunk = chunk.model_dump()1209            message_chunk = _convert_chunk_to_message_chunk(chunk, default_chunk_class)1210            generation_info: dict[str, Any] = {}1211            logprobs = None1212            if choices := chunk.get("choices"):1213                choice = choices[0]1214                if finish_reason := choice.get("finish_reason"):1215                    generation_info["finish_reason"] = finish_reason1216                    generation_info["model_name"] = self.model_name1217                logprobs = choice.get("logprobs")1218                if logprobs:1219                    generation_info["logprobs"] = logprobs1220            default_chunk_class = message_chunk.__class__1221            generation_chunk = ChatGenerationChunk(1222                message=message_chunk, generation_info=generation_info or None1223            )1224            if run_manager:1225                await run_manager.on_llm_new_token(1226                    token=generation_chunk.text,1227                    chunk=generation_chunk,1228                    logprobs=logprobs,1229                )1230            yield generation_chunk12311232    async def _agenerate(1233        self,1234        messages: list[BaseMessage],1235        stop: list[str] | None = None,1236        run_manager: AsyncCallbackManagerForLLMRun | None = None,1237        stream: bool | None = None,  # noqa: FBT0011238        **kwargs: Any,1239    ) -> ChatResult:1240        should_stream = stream if stream is not None else self.streaming1241        if should_stream:1242            stream_iter = self._astream(1243                messages, stop=stop, run_manager=run_manager, **kwargs1244            )1245            return await agenerate_from_stream(stream_iter)12461247        message_dicts, params = self._create_message_dicts(messages, stop)1248        params = {1249            **params,1250            **({"stream": stream} if stream is not None else {}),1251            **kwargs,1252        }1253        try:1254            response = await _acompletion_with_retry(1255                self, run_manager=run_manager, messages=message_dicts, **params1256            )1257        except BadRequestError as e:1258            _handle_fireworks_invalid_request(e)1259        return self._create_chat_result(response)12601261    @property1262    def _identifying_params(self) -> dict[str, Any]:1263        """Get the identifying parameters."""1264        return {"model_name": self.model_name, **self._default_params}12651266    def _get_invocation_params(1267        self, stop: list[str] | None = None, **kwargs: Any1268    ) -> dict[str, Any]:1269        """Get the parameters used to invoke the model."""1270        return {1271            "model": self.model_name,1272            **super()._get_invocation_params(stop=stop),1273            **self._default_params,1274            **kwargs,1275        }12761277    @property1278    def _llm_type(self) -> str:1279        """Return type of chat model."""1280        return "fireworks-chat"12811282    def bind_tools(1283        self,1284        tools: Sequence[dict[str, Any] | type[BaseModel] | Callable | BaseTool],1285        *,1286        tool_choice: dict | str | bool | None = None,1287        **kwargs: Any,1288    ) -> Runnable[LanguageModelInput, AIMessage]:1289        """Bind tool-like objects to this chat model.12901291        Assumes model is compatible with Fireworks tool-calling API.12921293        Args:1294            tools: A list of tool definitions to bind to this chat model.12951296                Supports any tool definition handled by [`convert_to_openai_tool`][langchain_core.utils.function_calling.convert_to_openai_tool].1297            tool_choice: Which tool to require the model to call.1298                Must be the name of the single provided function,1299                `'auto'` to automatically determine which function to call1300                with the option to not call any function, `'any'` to enforce that some1301                function is called, or a dict of the form:1302                `{"type": "function", "function": {"name": <<tool_name>>}}`.1303            **kwargs: Any additional parameters to pass to1304                `langchain_fireworks.chat_models.ChatFireworks.bind`1305        """  # noqa: E5011306        strict = kwargs.pop("strict", None)1307        formatted_tools = [1308            convert_to_openai_tool(tool, strict=strict) for tool in tools1309        ]1310        if tool_choice is not None and tool_choice:1311            if isinstance(tool_choice, str) and (1312                tool_choice not in ("auto", "any", "none")1313            ):1314                tool_choice = {"type": "function", "function": {"name": tool_choice}}1315            if isinstance(tool_choice, bool):1316                if len(tools) > 1:1317                    msg = (1318                        "tool_choice can only be True when there is one tool. Received "1319                        f"{len(tools)} tools."1320                    )1321                    raise ValueError(msg)1322                tool_name = formatted_tools[0]["function"]["name"]1323                tool_choice = {1324                    "type": "function",1325                    "function": {"name": tool_name},1326                }13271328            kwargs["tool_choice"] = tool_choice1329        return super().bind(tools=formatted_tools, **kwargs)13301331    def with_structured_output(1332        self,1333        schema: dict | type[BaseModel] | None = None,1334        *,1335        method: Literal[1336            "function_calling", "json_mode", "json_schema"1337        ] = "function_calling",1338        include_raw: bool = False,1339        **kwargs: Any,1340    ) -> Runnable[LanguageModelInput, dict | BaseModel]:1341        """Model wrapper that returns outputs formatted to match the given schema.13421343        Args:1344            schema: The output schema. Can be passed in as:13451346                - An OpenAI function/tool schema,1347                - A JSON Schema,1348                - A `TypedDict` class,1349                - Or a Pydantic class.13501351                If `schema` is a Pydantic class then the model output will be a1352                Pydantic instance of that class, and the model-generated fields will be1353                validated by the Pydantic class. Otherwise the model output will be a1354                dict and will not be validated.13551356                See `langchain_core.utils.function_calling.convert_to_openai_tool` for1357                more on how to properly specify types and descriptions of schema fields1358                when specifying a Pydantic or `TypedDict` class.13591360            method: The method for steering model generation, one of:13611362                - `'function_calling'`:1363                    Uses Fireworks's [tool-calling features](https://docs.fireworks.ai/guides/function-calling).1364                - `'json_schema'`:1365                    Uses Fireworks's [structured output feature](https://docs.fireworks.ai/structured-responses/structured-response-formatting).1366                - `'json_mode'`:1367                    Uses Fireworks's [JSON mode feature](https://docs.fireworks.ai/structured-responses/structured-response-formatting).13681369                !!! warning "Behavior changed in `langchain-fireworks` 0.2.8"13701371                    Added support for `'json_schema'`.13721373            include_raw:1374                If `False` then only the parsed structured output is returned.13751376                If an error occurs during model output parsing it will be raised.13771378                If `True` then both the raw model response (a `BaseMessage`) and the1379                parsed model response will be returned.13801381                If an error occurs during output parsing it will be caught and returned1382                as well.13831384                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and1385                `'parsing_error'`.13861387            kwargs:1388                Any additional parameters to pass to the `langchain.runnable.Runnable`1389                constructor.13901391        Returns:1392            A `Runnable` that takes same inputs as a1393                `langchain_core.language_models.chat.BaseChatModel`. If `include_raw` is1394                `False` and `schema` is a Pydantic class, `Runnable` outputs an instance1395                of `schema` (i.e., a Pydantic object). Otherwise, if `include_raw` is1396                `False` then `Runnable` outputs a `dict`.13971398                If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:13991400                - `'raw'`: `BaseMessage`1401                - `'parsed'`: `None` if there was a parsing error, otherwise the type1402                    depends on the `schema` as described above.1403                - `'parsing_error'`: `BaseException | None`14041405        Example: schema=Pydantic class, method="function_calling", include_raw=False:14061407        ```python1408        from typing import Optional14091410        from langchain_fireworks import ChatFireworks1411        from pydantic import BaseModel, Field141214131414        class AnswerWithJustification(BaseModel):1415            '''An answer to the user question along with justification for the answer.'''14161417            answer: str1418            # If we provide default values and/or descriptions for fields, these will be passed1419            # to the model. This is an important part of improving a model's ability to1420            # correctly return structured outputs.1421            justification: str | None = Field(1422                default=None, description="A justification for the answer."1423            )142414251426        model = ChatFireworks(1427            model="accounts/fireworks/models/gpt-oss-120b",1428            temperature=0,1429        )1430        structured_model = model.with_structured_output(AnswerWithJustification)14311432        structured_model.invoke(1433            "What weighs more a pound of bricks or a pound of feathers"1434        )14351436        # -> AnswerWithJustification(1437        #     answer='They weigh the same',1438        #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'1439        # )1440        ```14411442        Example: schema=Pydantic class, method="function_calling", include_raw=True:14431444        ```python1445        from langchain_fireworks import ChatFireworks1446        from pydantic import BaseModel144714481449        class AnswerWithJustification(BaseModel):1450            '''An answer to the user question along with justification for the answer.'''14511452            answer: str1453            justification: str145414551456        model = ChatFireworks(1457            model="accounts/fireworks/models/gpt-oss-120b",1458            temperature=0,1459        )1460        structured_model = model.with_structured_output(1461            AnswerWithJustification, include_raw=True1462        )14631464        structured_model.invoke(1465            "What weighs more a pound of bricks or a pound of feathers"1466        )1467        # -> {1468        #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),1469        #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),1470        #     'parsing_error': None1471        # }1472        ```14731474        Example: schema=TypedDict class, method="function_calling", include_raw=False:14751476        ```python1477        from typing_extensions import Annotated, TypedDict14781479        from langchain_fireworks import ChatFireworks148014811482        class AnswerWithJustification(TypedDict):1483            '''An answer to the user question along with justification for the answer.'''14841485            answer: str1486            justification: Annotated[1487                str | None, None, "A justification for the answer."1488            ]148914901491        model = ChatFireworks(1492            model="accounts/fireworks/models/gpt-oss-120b",1493            temperature=0,1494        )1495        structured_model = model.with_structured_output(AnswerWithJustification)14961497        structured_model.invoke(1498            "What weighs more a pound of bricks or a pound of feathers"1499        )1500        # -> {1501        #     'answer': 'They weigh the same',1502        #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'1503        # }1504        ```15051506        Example: schema=OpenAI function schema, method="function_calling", include_raw=False:15071508        ```python1509        from langchain_fireworks import ChatFireworks15101511        oai_schema = {1512            "name": "AnswerWithJustification",1513            "description": "An answer to the user question along with justification for the answer.",1514            "parameters": {1515                "type": "object",1516                "properties": {1517                    "answer": {"type": "string"},1518                    "justification": {1519                        "description": "A justification for the answer.",1520                        "type": "string",1521                    },1522                },1523                "required": ["answer"],1524            },1525        }15261527        model = ChatFireworks(1528            model="accounts/fireworks/models/gpt-oss-120b",1529            temperature=0,1530        )1531        structured_model = model.with_structured_output(oai_schema)15321533        structured_model.invoke(1534            "What weighs more a pound of bricks or a pound of feathers"1535        )1536        # -> {1537        #     'answer': 'They weigh the same',1538        #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'1539        # }1540        ```15411542        Example: schema=Pydantic class, method="json_mode", include_raw=True:15431544        ```python1545        from langchain_fireworks import ChatFireworks1546        from pydantic import BaseModel154715481549        class AnswerWithJustification(BaseModel):1550            answer: str1551            justification: str155215531554        model = ChatFireworks(1555            model="accounts/fireworks/models/gpt-oss-120b", temperature=01556        )1557        structured_model = model.with_structured_output(1558            AnswerWithJustification, method="json_mode", include_raw=True1559        )15601561        structured_model.invoke(1562            "Answer the following question. "1563            "Make sure to return a JSON blob with keys 'answer' and 'justification'. "1564            "What's heavier a pound of bricks or a pound of feathers?"1565        )1566        # -> {1567        #     'raw': AIMessage(content='{"answer": "They are both the same weight.", "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight."}'),1568        #     'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),1569        #     'parsing_error': None1570        # }1571        ```15721573        Example: schema=None, method="json_mode", include_raw=True:15741575        ```python1576        structured_model = model.with_structured_output(1577            method="json_mode", include_raw=True1578        )15791580        structured_model.invoke(1581            "Answer the following question. "1582            "Make sure to return a JSON blob with keys 'answer' and 'justification'. "1583            "What's heavier a pound of bricks or a pound of feathers?"1584        )1585        # -> {1586        #     'raw': AIMessage(content='{"answer": "They are both the same weight.", "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight."}'),1587        #     'parsed': {1588        #         'answer': 'They are both the same weight.',1589        #         'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'1590        #     },1591        #     'parsing_error': None1592        # }1593        ```15941595        """  # noqa: E5011596        _ = kwargs.pop("strict", None)1597        if kwargs:1598            msg = f"Received unsupported arguments {kwargs}"1599            raise ValueError(msg)1600        is_pydantic_schema = _is_pydantic_class(schema)1601        if method == "function_calling":1602            if schema is None:1603                msg = (1604                    "schema must be specified when method is 'function_calling'. "1605                    "Received None."1606                )1607                raise ValueError(msg)1608            formatted_tool = convert_to_openai_tool(schema)1609            tool_name = formatted_tool["function"]["name"]1610            llm = self.bind_tools(1611                [schema],1612                tool_choice=tool_name,1613                ls_structured_output_format={1614                    "kwargs": {"method": "function_calling"},1615                    "schema": formatted_tool,1616                },1617            )1618            if is_pydantic_schema:1619                output_parser: OutputParserLike = PydanticToolsParser(1620                    tools=[schema],  # type: ignore[list-item]1621                    first_tool_only=True,  # type: ignore[list-item]1622                )1623            else:1624                output_parser = JsonOutputKeyToolsParser(1625                    key_name=tool_name, first_tool_only=True1626                )1627        elif method == "json_schema":1628            if schema is None:1629                msg = (1630                    "schema must be specified when method is 'json_schema'. "1631                    "Received None."1632                )1633                raise ValueError(msg)1634            formatted_schema = convert_to_json_schema(schema)1635            llm = self.bind(1636                response_format={"type": "json_object", "schema": formatted_schema},1637                ls_structured_output_format={1638                    "kwargs": {"method": "json_schema"},1639                    "schema": schema,1640                },1641            )1642            output_parser = (1643                PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]1644                if is_pydantic_schema1645                else JsonOutputParser()1646            )1647        elif method == "json_mode":1648            llm = self.bind(1649                response_format={"type": "json_object"},1650                ls_structured_output_format={1651                    "kwargs": {"method": "json_mode"},1652                    "schema": schema,1653                },1654            )1655            output_parser = (1656                PydanticOutputParser(pydantic_object=schema)  # type: ignore[type-var, arg-type]1657                if is_pydantic_schema1658                else JsonOutputParser()1659            )1660        else:1661            msg = (1662                f"Unrecognized method argument. Expected one of 'function_calling' or "1663                f"'json_mode'. Received: '{method}'"1664            )1665            raise ValueError(msg)16661667        if include_raw:1668            parser_assign = RunnablePassthrough.assign(1669                parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None1670            )1671            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)1672            parser_with_fallback = parser_assign.with_fallbacks(1673                [parser_none], exception_key="parsing_error"1674            )1675            return RunnableMap(raw=llm) | parser_with_fallback1676        return llm | output_parser167716781679def _is_pydantic_class(obj: Any) -> bool:1680    return isinstance(obj, type) and is_basemodel_subclass(obj)168116821683def _lc_tool_call_to_fireworks_tool_call(tool_call: ToolCall) -> dict:1684    return {1685        "type": "function",1686        "id": tool_call["id"],1687        "function": {1688            "name": tool_call["name"],1689            "arguments": json.dumps(tool_call["args"], ensure_ascii=False),1690        },1691    }169216931694def _lc_invalid_tool_call_to_fireworks_tool_call(1695    invalid_tool_call: InvalidToolCall,1696) -> dict:1697    return {1698        "type": "function",1699        "id": invalid_tool_call["id"],1700        "function": {1701            "name": invalid_tool_call["name"],1702            "arguments": invalid_tool_call["args"],1703        },1704    }
Code quality findings 32

Overuse may indicate design issues; consider polymorphism
L233
isinstance-overuse
if not isinstance(content, list):
Overuse may indicate design issues; consider polymorphism
L237
isinstance-overuse
if isinstance(block, dict):
Overuse may indicate design issues; consider polymorphism
L245
isinstance-overuse
and isinstance(sanitized[0], dict)
Overuse may indicate design issues; consider polymorphism
L248
isinstance-overuse
and isinstance(sanitized[0]["text"], str)
Overuse may indicate design issues; consider polymorphism
L274
isinstance-overuse
if not isinstance(content, list):
Overuse may indicate design issues; consider polymorphism
L278
isinstance-overuse
if isinstance(block, dict) and "type" in block:
Overuse may indicate design issues; consider polymorphism
L296
isinstance-overuse
and isinstance(source, dict)
Overuse may indicate design issues; consider polymorphism
L329
isinstance-overuse
if isinstance(message, ChatMessage):
Overuse may indicate design issues; consider polymorphism
L336
isinstance-overuse
elif isinstance(message, HumanMessage):
Overuse may indicate design issues; consider polymorphism
L343
isinstance-overuse
elif isinstance(message, AIMessage):
Overuse may indicate design issues; consider polymorphism
L372
isinstance-overuse
elif isinstance(message, SystemMessage):
Overuse may indicate design issues; consider polymorphism
L379
isinstance-overuse
elif isinstance(message, FunctionMessage):
Overuse may indicate design issues; consider polymorphism
L385
isinstance-overuse
elif isinstance(message, ToolMessage):
Overuse may indicate design issues; consider polymorphism
L438
isinstance-overuse
if isinstance(new_usage, int):
Overuse may indicate design issues; consider polymorphism
L439
isinstance-overuse
if not isinstance(overall_token_usage, int):
Overuse may indicate design issues; consider polymorphism
L447
isinstance-overuse
if isinstance(new_usage, dict):
Overuse may indicate design issues; consider polymorphism
L448
isinstance-overuse
if not isinstance(overall_token_usage, dict):
Overuse may indicate design issues; consider polymorphism
L460
isinstance-overuse
default: TokenUsageTree = {} if isinstance(value, dict) else 0
Use isinstance() for type checking instead of type()
L465
type-check
logger.warning("Unexpected type for token usage: %s", type(new_usage).__name__)
Ensure functions have docstrings for documentation
L752
missing-docstring
def lc_secrets(self) -> dict[str, str]:
Ensure functions have docstrings for documentation
L765
missing-docstring
def lc_attributes(self) -> dict[str, Any]:
Overuse may indicate design issues; consider polymorphism
L924
isinstance-overuse
if isinstance(values, dict):
Overuse may indicate design issues; consider polymorphism
L964
isinstance-overuse
if isinstance(self.request_timeout, tuple):
Overuse may indicate design issues; consider polymorphism
L1096
isinstance-overuse
if not isinstance(chunk, dict):
Overuse may indicate design issues; consider polymorphism
L1158
isinstance-overuse
if not isinstance(response, dict):
Overuse may indicate design issues; consider polymorphism
L1164
isinstance-overuse
if isinstance(message, AIMessage):
Overuse may indicate design issues; consider polymorphism
L1207
isinstance-overuse
if not isinstance(chunk, dict):
Ensure functions have docstrings for documentation
L1282
missing-docstring
def bind_tools(
Overuse may indicate design issues; consider polymorphism
L1311
isinstance-overuse
if isinstance(tool_choice, str) and (
Overuse may indicate design issues; consider polymorphism
L1315
isinstance-overuse
if isinstance(tool_choice, bool):
Ensure functions have docstrings for documentation
L1331
missing-docstring
def with_structured_output(
Overuse may indicate design issues; consider polymorphism
L1680
isinstance-overuse
return isinstance(obj, type) and is_basemodel_subclass(obj)
Code quality findings 32

Get this view in your editor