libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py · langchain-ai/langchain

1"""Hugging Face Chat Wrapper."""23from __future__ import annotations45import contextlib6import json7from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence8from dataclasses import dataclass9from operator import itemgetter10from typing import TYPE_CHECKING, Any, Literal, cast1112if TYPE_CHECKING:13    from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint14    from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline1516from langchain_core.callbacks.manager import (17    AsyncCallbackManagerForLLMRun,18    CallbackManagerForLLMRun,19)20from langchain_core.language_models import (21    LanguageModelInput,22    ModelProfile,23    ModelProfileRegistry,24)25from langchain_core.language_models.chat_models import (26    BaseChatModel,27    agenerate_from_stream,28    generate_from_stream,29)30from langchain_core.messages import (31    AIMessage,32    AIMessageChunk,33    BaseMessage,34    BaseMessageChunk,35    ChatMessage,36    ChatMessageChunk,37    FunctionMessage,38    FunctionMessageChunk,39    HumanMessage,40    HumanMessageChunk,41    InvalidToolCall,42    SystemMessage,43    SystemMessageChunk,44    ToolCall,45    ToolMessage,46    ToolMessageChunk,47)48from langchain_core.messages.tool import ToolCallChunk49from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk50from langchain_core.output_parsers import JsonOutputParser51from langchain_core.output_parsers.openai_tools import (52    JsonOutputKeyToolsParser,53    make_invalid_tool_call,54    parse_tool_call,55)56from langchain_core.outputs import (57    ChatGeneration,58    ChatGenerationChunk,59    ChatResult,60    LLMResult,61)62from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough63from langchain_core.tools import BaseTool64from langchain_core.utils.function_calling import (65    convert_to_json_schema,66    convert_to_openai_tool,67)68from langchain_core.utils.pydantic import is_basemodel_subclass69from pydantic import BaseModel, Field, model_validator70from typing_extensions import Self7172from langchain_huggingface._version import __version__73from langchain_huggingface.data._profiles import _PROFILES74from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint75from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline7677_MODEL_PROFILES = cast("ModelProfileRegistry", _PROFILES)787980def _get_default_model_profile(model_name: str) -> ModelProfile:81    default = _MODEL_PROFILES.get(model_name) or {}82    return default.copy()838485@dataclass86class TGI_RESPONSE:87    """Response from the TextGenInference API."""8889    choices: list[Any]90    usage: dict919293@dataclass94class TGI_MESSAGE:95    """Message to send to the TextGenInference API."""9697    role: str98    content: str99    tool_calls: list[dict]100101102def _lc_tool_call_to_hf_tool_call(tool_call: ToolCall) -> dict:103    return {104        "type": "function",105        "id": tool_call["id"],106        "function": {107            "name": tool_call["name"],108            "arguments": json.dumps(tool_call["args"], ensure_ascii=False),109        },110    }111112113def _lc_invalid_tool_call_to_hf_tool_call(114    invalid_tool_call: InvalidToolCall,115) -> dict:116    return {117        "type": "function",118        "id": invalid_tool_call["id"],119        "function": {120            "name": invalid_tool_call["name"],121            "arguments": invalid_tool_call["args"],122        },123    }124125126def _convert_message_to_dict(message: BaseMessage) -> dict:127    """Convert a LangChain message to a dictionary.128129    Args:130        message: The LangChain message.131132    Returns:133        The dictionary.134135    """136    message_dict: dict[str, Any]137    if isinstance(message, ChatMessage):138        message_dict = {"role": message.role, "content": message.content}139    elif isinstance(message, HumanMessage):140        message_dict = {"role": "user", "content": message.content}141    elif isinstance(message, AIMessage):142        message_dict = {"role": "assistant", "content": message.content}143        if "function_call" in message.additional_kwargs:144            message_dict["function_call"] = message.additional_kwargs["function_call"]145            # If function call only, content is None not empty string146            if message_dict["content"] == "":147                message_dict["content"] = None148        if message.tool_calls or message.invalid_tool_calls:149            message_dict["tool_calls"] = [150                _lc_tool_call_to_hf_tool_call(tc) for tc in message.tool_calls151            ] + [152                _lc_invalid_tool_call_to_hf_tool_call(tc)153                for tc in message.invalid_tool_calls154            ]155        elif "tool_calls" in message.additional_kwargs:156            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]157        # If tool calls only, content is None not empty string158        if "tool_calls" in message_dict and message_dict["content"] == "":159            message_dict["content"] = None160        else:161            pass162    elif isinstance(message, SystemMessage):163        message_dict = {"role": "system", "content": message.content}164    elif isinstance(message, FunctionMessage):165        message_dict = {166            "role": "function",167            "content": message.content,168            "name": message.name,169        }170    elif isinstance(message, ToolMessage):171        message_dict = {172            "role": "tool",173            "content": message.content,174            "tool_call_id": message.tool_call_id,175        }176    else:177        msg = f"Got unknown type {message}"178        raise TypeError(msg)179    if "name" in message.additional_kwargs:180        message_dict["name"] = message.additional_kwargs["name"]181    return message_dict182183184def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:185    """Convert a dictionary to a LangChain message.186187    Args:188        _dict: The dictionary.189190    Returns:191        The LangChain message.192193    """194    role = _dict.get("role")195    if role == "user":196        return HumanMessage(content=_dict.get("content", ""))197    if role == "assistant":198        content = _dict.get("content", "") or ""199        additional_kwargs: dict = {}200        if function_call := _dict.get("function_call"):201            additional_kwargs["function_call"] = dict(function_call)202        tool_calls = []203        invalid_tool_calls = []204        if raw_tool_calls := _dict.get("tool_calls"):205            additional_kwargs["tool_calls"] = raw_tool_calls206            for raw_tool_call in raw_tool_calls:207                try:208                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))209                except Exception as e:210                    invalid_tool_calls.append(211                        dict(make_invalid_tool_call(raw_tool_call, str(e)))212                    )213        return AIMessage(214            content=content,215            additional_kwargs=additional_kwargs,216            tool_calls=tool_calls,217            invalid_tool_calls=invalid_tool_calls,218        )219    if role == "system":220        return SystemMessage(content=_dict.get("content", ""))221    if role == "function":222        return FunctionMessage(223            content=_dict.get("content", ""), name=_dict.get("name", "")224        )225    if role == "tool":226        additional_kwargs = {}227        if "name" in _dict:228            additional_kwargs["name"] = _dict["name"]229        return ToolMessage(230            content=_dict.get("content", ""),231            tool_call_id=_dict.get("tool_call_id", ""),232            additional_kwargs=additional_kwargs,233        )234    return ChatMessage(content=_dict.get("content", ""), role=role or "")235236237def _is_huggingface_hub(llm: Any) -> bool:238    try:239        from langchain_community.llms.huggingface_hub import (240            HuggingFaceHub,  # type: ignore[import-not-found]241        )242243        return isinstance(llm, HuggingFaceHub)244    except ImportError:245        # if no langchain community, it is not a HuggingFaceHub246        return False247248249def _convert_chunk_to_message_chunk(250    chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]251) -> BaseMessageChunk:252    choice = chunk["choices"][0]253    _dict = choice["delta"]254    role = cast(str, _dict.get("role"))255    content = cast(str, _dict.get("content") or "")256    additional_kwargs: dict = {}257    tool_call_chunks: list[ToolCallChunk] = []258    if _dict.get("function_call"):259        function_call = dict(_dict["function_call"])260        if "name" in function_call and function_call["name"] is None:261            function_call["name"] = ""262        additional_kwargs["function_call"] = function_call263    if raw_tool_calls := _dict.get("tool_calls"):264        additional_kwargs["tool_calls"] = raw_tool_calls265        for rtc in raw_tool_calls:266            with contextlib.suppress(KeyError):267                tool_call_chunks.append(268                    create_tool_call_chunk(269                        name=rtc["function"].get("name"),270                        args=rtc["function"].get("arguments"),271                        id=rtc.get("id"),272                        index=rtc.get("index"),273                    )274                )275    if role == "user" or default_class == HumanMessageChunk:276        return HumanMessageChunk(content=content)277    if role == "assistant" or default_class == AIMessageChunk:278        if usage := chunk.get("usage"):279            input_tokens = usage.get("prompt_tokens", 0)280            output_tokens = usage.get("completion_tokens", 0)281            usage_metadata = {282                "input_tokens": input_tokens,283                "output_tokens": output_tokens,284                "total_tokens": usage.get("total_tokens", input_tokens + output_tokens),285            }286        else:287            usage_metadata = None288        return AIMessageChunk(289            content=content,290            additional_kwargs=additional_kwargs,291            tool_call_chunks=tool_call_chunks,292            usage_metadata=usage_metadata,  # type: ignore[arg-type]293        )294    if role == "system" or default_class == SystemMessageChunk:295        return SystemMessageChunk(content=content)296    if role == "function" or default_class == FunctionMessageChunk:297        return FunctionMessageChunk(content=content, name=_dict["name"])298    if role == "tool" or default_class == ToolMessageChunk:299        return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])300    if role or default_class == ChatMessageChunk:301        return ChatMessageChunk(content=content, role=role)302    return default_class(content=content)  # type: ignore[call-arg]303304305def _is_huggingface_textgen_inference(llm: Any) -> bool:306    try:307        from langchain_community.llms.huggingface_text_gen_inference import (308            HuggingFaceTextGenInference,  # type: ignore[import-not-found]309        )310311        return isinstance(llm, HuggingFaceTextGenInference)312    except ImportError:313        # if no langchain community, it is not a HuggingFaceTextGenInference314        return False315316317def _is_huggingface_endpoint(llm: Any) -> bool:318    return isinstance(llm, HuggingFaceEndpoint)319320321def _is_huggingface_pipeline(llm: Any) -> bool:322    return isinstance(llm, HuggingFacePipeline)323324325class ChatHuggingFace(BaseChatModel):326    r"""Hugging Face LLM's as ChatModels.327328    Works with `HuggingFaceTextGenInference`, `HuggingFaceEndpoint`,329    `HuggingFaceHub`, and `HuggingFacePipeline` LLMs.330331    Upon instantiating this class, the model_id is resolved from the url332    provided to the LLM, and the appropriate tokenizer is loaded from333    the HuggingFace Hub.334335    Setup:336        Install `langchain-huggingface` and ensure your Hugging Face token337        is saved.338339        ```bash340        pip install langchain-huggingface341        ```342343        ```python344        from huggingface_hub import login345346        login()  # You will be prompted for your HF key, which will then be saved locally347        ```348349    Key init args — completion params:350        llm:351            LLM to be used.352353    Key init args — client params:354        custom_get_token_ids:355            Optional encoder to use for counting tokens.356        metadata:357            Metadata to add to the run trace.358        tags:359            Tags to add to the run trace.360        verbose:361            Whether to print out response text.362363    See full list of supported init args and their descriptions in the params364    section.365366    Instantiate:367        ```python368        from langchain_huggingface import HuggingFaceEndpoint,369        ChatHuggingFace370371        model = HuggingFaceEndpoint(372            repo_id="microsoft/Phi-3-mini-4k-instruct",373            task="text-generation",374            max_new_tokens=512,375            do_sample=False,376            repetition_penalty=1.03,377        )378379        chat = ChatHuggingFace(llm=model, verbose=True)380        ```381382    Invoke:383        ```python384        messages = [385            ("system", "You are a helpful translator. Translate the user386            sentence to French."),387            ("human", "I love programming."),388        ]389390        chat(...).invoke(messages)391        ```392393        ```python394        AIMessage(content='Je ai une passion pour le programme.\n\nIn395        French, we use "ai" for masculine subjects and "a" for feminine396        subjects. Since "programming" is gender-neutral in English, we397        will go with the masculine "programme".\n\nConfirmation: "J\'aime398        le programme." is more commonly used. The sentence above is399        technically accurate, but less commonly used in spoken French as400        "ai" is used less frequently in everyday speech.',401        response_metadata={'token_usage': ChatCompletionOutputUsage402        (completion_tokens=100, prompt_tokens=55, total_tokens=155),403        'model': '', 'finish_reason': 'length'},404        id='run-874c24b7-0272-4c99-b259-5d6d7facbc56-0')405        ```406407    Stream:408        ```python409        for chunk in chat.stream(messages):410            print(chunk)411        ```412413        ```python414        content='Je ai une passion pour le programme.\n\nIn French, we use415        "ai" for masculine subjects and "a" for feminine subjects.416        Since "programming" is gender-neutral in English,417        we will go with the masculine "programme".\n\nConfirmation:418        "J\'aime le programme." is more commonly used. The sentence419        above is technically accurate, but less commonly used in spoken420        French as "ai" is used less frequently in everyday speech.'421        response_metadata={'token_usage': ChatCompletionOutputUsage422        (completion_tokens=100, prompt_tokens=55, total_tokens=155),423        'model': '', 'finish_reason': 'length'}424        id='run-7d7b1967-9612-4f9a-911a-b2b5ca85046a-0'425        ```426427    Async:428        ```python429        await chat.ainvoke(messages)430        ```431432        ```python433        AIMessage(content='Je déaime le programming.\n\nLittérale : Je434        (j\'aime) déaime (le) programming.\n\nNote: "Programming" in435        French is "programmation". But here, I used "programming" instead436        of "programmation" because the user said "I love programming"437        instead of "I love programming (in French)", which would be438        "J\'aime la programmation". By translating the sentence439        literally, I preserved the original meaning of the user\'s440        sentence.', id='run-fd850318-e299-4735-b4c6-3496dc930b1d-0')441        ```442443    Tool calling:444        ```python445        from pydantic import BaseModel, Field446447        class GetWeather(BaseModel):448            '''Get the current weather in a given location'''449450            location: str = Field(..., description="The city and state,451            e.g. San Francisco, CA")452453        class GetPopulation(BaseModel):454            '''Get the current population in a given location'''455456            location: str = Field(..., description="The city and state,457            e.g. San Francisco, CA")458459        chat_with_tools = chat.bind_tools([GetWeather, GetPopulation])460        ai_msg = chat_with_tools.invoke("Which city is hotter today and461        which is bigger: LA or NY?")462        ai_msg.tool_calls463        ```464465        ```python466        [467            {468                "name": "GetPopulation",469                "args": {"location": "Los Angeles, CA"},470                "id": "0",471            }472        ]473        ```474475    Response metadata476        ```python477        ai_msg = chat.invoke(messages)478        ai_msg.response_metadata479        ```480481        ```python482        {483            "token_usage": ChatCompletionOutputUsage(484                completion_tokens=100, prompt_tokens=8, total_tokens=108485            ),486            "model": "",487            "finish_reason": "length",488        }489        ```490    """  # noqa: E501491492    llm: Any493    """LLM, must be of type HuggingFaceTextGenInference, HuggingFaceEndpoint,494        HuggingFaceHub, or HuggingFacePipeline."""495    tokenizer: Any = None496    """Tokenizer for the model. Only used for HuggingFacePipeline."""497    model_id: str | None = None498    """Model ID for the model. Only used for HuggingFaceEndpoint."""499    temperature: float | None = None500    """What sampling temperature to use."""501    stop: str | list[str] | None = Field(default=None, alias="stop_sequences")502    """Default stop sequences."""503    presence_penalty: float | None = None504    """Penalizes repeated tokens."""505    frequency_penalty: float | None = None506    """Penalizes repeated tokens according to frequency."""507    seed: int | None = None508    """Seed for generation"""509    logprobs: bool | None = None510    """Whether to return logprobs."""511    top_logprobs: int | None = None512    """Number of most likely tokens to return at each token position, each with513     an associated log probability. `logprobs` must be set to true514     if this parameter is used."""515    logit_bias: dict[int, int] | None = None516    """Modify the likelihood of specified tokens appearing in the completion."""517    streaming: bool = False518    """Whether to stream the results or not."""519    stream_usage: bool | None = None520    """Whether to include usage metadata in streaming output. If True, an additional521    message chunk will be generated during the stream including usage metadata."""522    n: int | None = None523    """Number of chat completions to generate for each prompt."""524    top_p: float | None = None525    """Total probability mass of tokens to consider at each step."""526    max_tokens: int | None = None527    """Maximum number of tokens to generate."""528    model_kwargs: dict[str, Any] = Field(default_factory=dict)529    """Holds any model parameters valid for `create` call not explicitly specified."""530531    def __init__(self, **kwargs: Any):532        super().__init__(**kwargs)533534        # Inherit properties from the LLM if they weren't explicitly set535        self._inherit_llm_properties()536537        self._resolve_model_id()538539    def _inherit_llm_properties(self) -> None:540        """Inherit properties from the wrapped LLM instance if not explicitly set."""541        if not hasattr(self, "llm") or self.llm is None:542            return543544        # Map of ChatHuggingFace properties to LLM properties545        property_mappings = {546            "temperature": "temperature",547            "max_tokens": "max_new_tokens",  # Different naming convention548            "top_p": "top_p",549            "seed": "seed",550            "streaming": "streaming",551            "stop": "stop_sequences",552        }553554        # Inherit properties from LLM and not explicitly set here555        for chat_prop, llm_prop in property_mappings.items():556            if hasattr(self.llm, llm_prop):557                llm_value = getattr(self.llm, llm_prop)558                chat_value = getattr(self, chat_prop, None)559                if not chat_value and llm_value:560                    setattr(self, chat_prop, llm_value)561562        # Handle special cases for HuggingFaceEndpoint563        if _is_huggingface_endpoint(self.llm):564            # Inherit additional HuggingFaceEndpoint specific properties565            endpoint_mappings = {566                "frequency_penalty": "repetition_penalty",567            }568569            for chat_prop, llm_prop in endpoint_mappings.items():570                if hasattr(self.llm, llm_prop):571                    llm_value = getattr(self.llm, llm_prop)572                    chat_value = getattr(self, chat_prop, None)573                    if chat_value is None and llm_value is not None:574                        setattr(self, chat_prop, llm_value)575576        # Inherit model_kwargs if not explicitly set577        if (578            not self.model_kwargs579            and hasattr(self.llm, "model_kwargs")580            and isinstance(self.llm.model_kwargs, dict)581        ):582            self.model_kwargs = self.llm.model_kwargs.copy()583584    @model_validator(mode="after")585    def _set_huggingface_version(self) -> Self:586        """Set package version in metadata."""587        self._add_version("langchain-huggingface", __version__)588        return self589590    @model_validator(mode="after")591    def validate_llm(self) -> Self:592        if (593            not _is_huggingface_hub(self.llm)594            and not _is_huggingface_textgen_inference(self.llm)595            and not _is_huggingface_endpoint(self.llm)596            and not _is_huggingface_pipeline(self.llm)597        ):598            msg = (599                "Expected llm to be one of HuggingFaceTextGenInference, "600                "HuggingFaceEndpoint, HuggingFaceHub, HuggingFacePipeline "601                f"received {type(self.llm)}"602            )603            raise TypeError(msg)604        return self605606    def _resolve_model_profile(self) -> ModelProfile | None:607        if self.model_id:608            return _get_default_model_profile(self.model_id) or None609        return None610611    @classmethod612    def from_model_id(613        cls,614        model_id: str,615        task: str | None = None,616        backend: Literal["pipeline", "endpoint", "text-gen"] = "pipeline",617        **kwargs: Any,618    ) -> ChatHuggingFace:619        """Construct a ChatHuggingFace model from a model_id.620621        Args:622            model_id: The model ID of the Hugging Face model.623            task: The task to perform (e.g., "text-generation").624            backend: The backend to use. One of "pipeline", "endpoint", "text-gen".625            **kwargs: Additional arguments to pass to the backend or ChatHuggingFace.626        """627        llm: (628            Any  # HuggingFacePipeline, HuggingFaceEndpoint, HuggingFaceTextGenInference629        )630        if backend == "pipeline":631            from langchain_huggingface.llms.huggingface_pipeline import (632                HuggingFacePipeline,633            )634635            task = task if task is not None else "text-generation"636637            # Separate pipeline-specific kwargs from ChatHuggingFace kwargs638            # Parameters that should go to HuggingFacePipeline.from_model_id639            pipeline_specific_kwargs = {}640641            # Extract pipeline-specific parameters642            pipeline_keys = [643                "backend",644                "device",645                "device_map",646                "model_kwargs",647                "pipeline_kwargs",648                "batch_size",649            ]650            for key in pipeline_keys:651                if key in kwargs:652                    pipeline_specific_kwargs[key] = kwargs.pop(key)653654            # Remaining kwargs (temperature, max_tokens, etc.) should go to655            # pipeline_kwargs for generation parameters, which ChatHuggingFace656            # will inherit from the LLM657            if "pipeline_kwargs" not in pipeline_specific_kwargs:658                pipeline_specific_kwargs["pipeline_kwargs"] = {}659660            # Add generation parameters to pipeline_kwargs661            # Map max_tokens to max_new_tokens for HuggingFace pipeline662            generation_params = {}663            for k, v in list(kwargs.items()):664                if k == "max_tokens":665                    generation_params["max_new_tokens"] = v666                    kwargs.pop(k)667                elif k in (668                    "temperature",669                    "max_new_tokens",670                    "top_p",671                    "top_k",672                    "repetition_penalty",673                    "do_sample",674                ):675                    generation_params[k] = v676                    kwargs.pop(k)677678            pipeline_specific_kwargs["pipeline_kwargs"].update(generation_params)679680            # Create the HuggingFacePipeline681            llm = HuggingFacePipeline.from_model_id(682                model_id=model_id, task=task, **pipeline_specific_kwargs683            )684        elif backend == "endpoint":685            from langchain_huggingface.llms.huggingface_endpoint import (686                HuggingFaceEndpoint,687            )688689            llm = HuggingFaceEndpoint(repo_id=model_id, task=task, **kwargs)690        elif backend == "text-gen":691            from langchain_community.llms.huggingface_text_gen_inference import (  # type: ignore[import-not-found]692                HuggingFaceTextGenInference,693            )694695            llm = HuggingFaceTextGenInference(inference_server_url=model_id, **kwargs)696        else:697            msg = f"Unknown backend: {backend}"698            raise ValueError(msg)699700        return cls(llm=llm, **kwargs)701702    def _create_chat_result(self, response: dict) -> ChatResult:703        generations = []704        token_usage = response.get("usage", {})705        for res in response["choices"]:706            message = _convert_dict_to_message(res["message"])707            if token_usage and isinstance(message, AIMessage):708                message.usage_metadata = {709                    "input_tokens": token_usage.get("prompt_tokens", 0),710                    "output_tokens": token_usage.get("completion_tokens", 0),711                    "total_tokens": token_usage.get("total_tokens", 0),712                }713            generation_info = {"finish_reason": res.get("finish_reason")}714            if "logprobs" in res:715                generation_info["logprobs"] = res["logprobs"]716            gen = ChatGeneration(717                message=message,718                generation_info=generation_info,719            )720            generations.append(gen)721        llm_output = {722            "token_usage": token_usage,723            "model_name": self.model_id,724            "system_fingerprint": response.get("system_fingerprint", ""),725        }726        return ChatResult(generations=generations, llm_output=llm_output)727728    def _generate(729        self,730        messages: list[BaseMessage],731        stop: list[str] | None = None,732        run_manager: CallbackManagerForLLMRun | None = None,733        stream: bool | None = None,  # noqa: FBT001734        **kwargs: Any,735    ) -> ChatResult:736        should_stream = stream if stream is not None else self.streaming737738        if _is_huggingface_textgen_inference(self.llm):739            message_dicts, params = self._create_message_dicts(messages, stop)740            answer = self.llm.client.chat(messages=message_dicts, **kwargs)741            return self._create_chat_result(answer)742        if _is_huggingface_endpoint(self.llm):743            if should_stream:744                stream_iter = self._stream(745                    messages, stop=stop, run_manager=run_manager, **kwargs746                )747                return generate_from_stream(stream_iter)748            message_dicts, params = self._create_message_dicts(messages, stop)749            params = {750                "stop": stop,751                **params,752                **({"stream": stream} if stream is not None else {}),753                **kwargs,754            }755            answer = self.llm.client.chat_completion(messages=message_dicts, **params)756            return self._create_chat_result(answer)757        llm_input = self._to_chat_prompt(messages)758759        if should_stream:760            stream_iter = self.llm._stream(761                llm_input, stop=stop, run_manager=run_manager, **kwargs762            )763            return generate_from_stream(stream_iter)764        llm_result = self.llm._generate(765            prompts=[llm_input], stop=stop, run_manager=run_manager, **kwargs766        )767        return self._to_chat_result(llm_result)768769    async def _agenerate(770        self,771        messages: list[BaseMessage],772        stop: list[str] | None = None,773        run_manager: AsyncCallbackManagerForLLMRun | None = None,774        stream: bool | None = None,  # noqa: FBT001775        **kwargs: Any,776    ) -> ChatResult:777        if _is_huggingface_textgen_inference(self.llm):778            message_dicts, params = self._create_message_dicts(messages, stop)779            answer = await self.llm.async_client.chat(messages=message_dicts, **kwargs)780            return self._create_chat_result(answer)781        if _is_huggingface_endpoint(self.llm):782            should_stream = stream if stream is not None else self.streaming783            if should_stream:784                stream_iter = self._astream(785                    messages, stop=stop, run_manager=run_manager, **kwargs786                )787                return await agenerate_from_stream(stream_iter)788            message_dicts, params = self._create_message_dicts(messages, stop)789            params = {790                **params,791                **({"stream": stream} if stream is not None else {}),792                **kwargs,793            }794795            answer = await self.llm.async_client.chat_completion(796                messages=message_dicts, **params797            )798            return self._create_chat_result(answer)799        if _is_huggingface_pipeline(self.llm):800            msg = "async generation is not supported with HuggingFacePipeline"801            raise NotImplementedError(msg)802        llm_input = self._to_chat_prompt(messages)803        llm_result = await self.llm._agenerate(804            prompts=[llm_input], stop=stop, run_manager=run_manager, **kwargs805        )806        return self._to_chat_result(llm_result)807808    def _should_stream_usage(809        self, *, stream_usage: bool | None = None, **kwargs: Any810    ) -> bool | None:811        """Determine whether to include usage metadata in streaming output.812813        For backwards compatibility, we check for `stream_options` passed814        explicitly to kwargs or in the model_kwargs and override self.stream_usage.815        """816        stream_usage_sources = [  # order of precedence817            stream_usage,818            kwargs.get("stream_options", {}).get("include_usage"),819            self.model_kwargs.get("stream_options", {}).get("include_usage"),820            self.stream_usage,821        ]822        for source in stream_usage_sources:823            if isinstance(source, bool):824                return source825        return self.stream_usage826827    def _stream(828        self,829        messages: list[BaseMessage],830        stop: list[str] | None = None,831        run_manager: CallbackManagerForLLMRun | None = None,832        *,833        stream_usage: bool | None = None,834        **kwargs: Any,835    ) -> Iterator[ChatGenerationChunk]:836        if _is_huggingface_endpoint(self.llm):837            stream_usage = self._should_stream_usage(838                stream_usage=stream_usage, **kwargs839            )840            if stream_usage:841                kwargs["stream_options"] = {"include_usage": stream_usage}842            message_dicts, params = self._create_message_dicts(messages, stop)843            params = {**params, **kwargs, "stream": True}844845            default_chunk_class: type[BaseMessageChunk] = AIMessageChunk846            for chunk in self.llm.client.chat_completion(847                messages=message_dicts, **params848            ):849                if len(chunk["choices"]) == 0:850                    if usage := chunk.get("usage"):851                        usage_msg = AIMessageChunk(852                            content="",853                            additional_kwargs={},854                            response_metadata={},855                            usage_metadata={856                                "input_tokens": usage.get("prompt_tokens", 0),857                                "output_tokens": usage.get("completion_tokens", 0),858                                "total_tokens": usage.get("total_tokens", 0),859                            },860                        )861                        yield ChatGenerationChunk(message=usage_msg)862                    continue863864                choice = chunk["choices"][0]865                message_chunk = _convert_chunk_to_message_chunk(866                    chunk, default_chunk_class867                )868                generation_info = {}869                if finish_reason := choice.get("finish_reason"):870                    generation_info["finish_reason"] = finish_reason871                    generation_info["model_name"] = self.model_id872                logprobs = choice.get("logprobs")873                if logprobs:874                    generation_info["logprobs"] = logprobs875                default_chunk_class = message_chunk.__class__876                generation_chunk = ChatGenerationChunk(877                    message=message_chunk, generation_info=generation_info or None878                )879                if run_manager:880                    run_manager.on_llm_new_token(881                        generation_chunk.text, chunk=generation_chunk, logprobs=logprobs882                    )883                yield generation_chunk884        else:885            llm_input = self._to_chat_prompt(messages)886            stream_iter = self.llm._stream(887                llm_input, stop=stop, run_manager=run_manager, **kwargs888            )889            for chunk in stream_iter:  # chunk is a GenerationChunk890                chat_chunk = ChatGenerationChunk(891                    message=AIMessageChunk(content=chunk.text),892                    generation_info=chunk.generation_info,893                )894                yield chat_chunk895896    async def _astream(897        self,898        messages: list[BaseMessage],899        stop: list[str] | None = None,900        run_manager: AsyncCallbackManagerForLLMRun | None = None,901        *,902        stream_usage: bool | None = None,903        **kwargs: Any,904    ) -> AsyncIterator[ChatGenerationChunk]:905        stream_usage = self._should_stream_usage(stream_usage=stream_usage, **kwargs)906        if stream_usage:907            kwargs["stream_options"] = {"include_usage": stream_usage}908        message_dicts, params = self._create_message_dicts(messages, stop)909        params = {**params, **kwargs, "stream": True}910911        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk912913        async for chunk in await self.llm.async_client.chat_completion(914            messages=message_dicts, **params915        ):916            if len(chunk["choices"]) == 0:917                if usage := chunk.get("usage"):918                    usage_msg = AIMessageChunk(919                        content="",920                        additional_kwargs={},921                        response_metadata={},922                        usage_metadata={923                            "input_tokens": usage.get("prompt_tokens", 0),924                            "output_tokens": usage.get("completion_tokens", 0),925                            "total_tokens": usage.get("total_tokens", 0),926                        },927                    )928                    yield ChatGenerationChunk(message=usage_msg)929                continue930931            choice = chunk["choices"][0]932            message_chunk = _convert_chunk_to_message_chunk(chunk, default_chunk_class)933            generation_info = {}934            if finish_reason := choice.get("finish_reason"):935                generation_info["finish_reason"] = finish_reason936                generation_info["model_name"] = self.model_id937            logprobs = choice.get("logprobs")938            if logprobs:939                generation_info["logprobs"] = logprobs940            default_chunk_class = message_chunk.__class__941            generation_chunk = ChatGenerationChunk(942                message=message_chunk, generation_info=generation_info or None943            )944            if run_manager:945                await run_manager.on_llm_new_token(946                    token=generation_chunk.text,947                    chunk=generation_chunk,948                    logprobs=logprobs,949                )950            yield generation_chunk951952    def _to_chat_prompt(953        self,954        messages: list[BaseMessage],955    ) -> str:956        """Convert a list of messages into a prompt format expected by wrapped LLM."""957        if not messages:958            msg = "At least one HumanMessage must be provided!"959            raise ValueError(msg)960961        if not isinstance(messages[-1], HumanMessage):962            msg = "Last message must be a HumanMessage!"963            raise ValueError(msg)964965        messages_dicts = [self._to_chatml_format(m) for m in messages]966967        return self.tokenizer.apply_chat_template(968            messages_dicts, tokenize=False, add_generation_prompt=True969        )970971    def _to_chatml_format(self, message: BaseMessage) -> dict:972        """Convert LangChain message to ChatML format."""973        if isinstance(message, SystemMessage):974            role = "system"975        elif isinstance(message, AIMessage):976            role = "assistant"977        elif isinstance(message, HumanMessage):978            role = "user"979        else:980            msg = f"Unknown message type: {type(message)}"981            raise ValueError(msg)982983        return {"role": role, "content": message.content}984985    @staticmethod986    def _to_chat_result(llm_result: LLMResult) -> ChatResult:987        chat_generations = []988989        for g in llm_result.generations[0]:990            chat_generation = ChatGeneration(991                message=AIMessage(content=g.text), generation_info=g.generation_info992            )993            chat_generations.append(chat_generation)994995        return ChatResult(996            generations=chat_generations, llm_output=llm_result.llm_output997        )998999    def _resolve_model_id(self) -> None:1000        """Resolve the model_id from the LLM's inference_server_url."""1001        from huggingface_hub import list_inference_endpoints  # type: ignore[import]10021003        if _is_huggingface_hub(self.llm) or (1004            hasattr(self.llm, "repo_id") and self.llm.repo_id1005        ):1006            self.model_id = self.llm.repo_id1007            return1008        if _is_huggingface_textgen_inference(self.llm):1009            endpoint_url: str | None = self.llm.inference_server_url1010        if _is_huggingface_pipeline(self.llm):1011            from transformers import AutoTokenizer  # type: ignore[import]10121013            self.model_id = self.model_id or self.llm.model_id1014            self.tokenizer = (1015                AutoTokenizer.from_pretrained(self.model_id)1016                if self.tokenizer is None1017                else self.tokenizer1018            )1019            return1020        if _is_huggingface_endpoint(self.llm):1021            self.model_id = self.llm.repo_id or self.llm.model1022            return1023        endpoint_url = self.llm.endpoint_url1024        available_endpoints = list_inference_endpoints("*")1025        for endpoint in available_endpoints:1026            if endpoint.url == endpoint_url:1027                self.model_id = endpoint.repository10281029        if not self.model_id:1030            msg = (1031                "Failed to resolve model_id:"1032                f"Could not find model id for inference server: {endpoint_url}"1033                "Make sure that your Hugging Face token has access to the endpoint."1034            )1035            raise ValueError(msg)10361037    def bind_tools(1038        self,1039        tools: Sequence[dict[str, Any] | type | Callable | BaseTool],1040        *,1041        tool_choice: dict | str | bool | None = None,1042        **kwargs: Any,1043    ) -> Runnable[LanguageModelInput, AIMessage]:1044        """Bind tool-like objects to this chat model.10451046        Assumes model is compatible with OpenAI tool-calling API.10471048        Args:1049            tools: A list of tool definitions to bind to this chat model.10501051                Supports any tool definition handled by [`convert_to_openai_tool`][langchain_core.utils.function_calling.convert_to_openai_tool].1052            tool_choice: Which tool to require the model to call.1053                Must be the name of the single provided function or1054                `'auto'` to automatically determine which function to call1055                (if any), or a dict of the form:1056                {"type": "function", "function": {"name": <<tool_name>>}}.1057            **kwargs: Any additional parameters to pass to the1058                `langchain.runnable.Runnable` constructor.1059        """  # noqa: E5011060        formatted_tools = [convert_to_openai_tool(tool) for tool in tools]1061        if tool_choice is not None and tool_choice:1062            if len(formatted_tools) != 1:1063                msg = (1064                    "When specifying `tool_choice`, you must provide exactly one "1065                    f"tool. Received {len(formatted_tools)} tools."1066                )1067                raise ValueError(msg)1068            if isinstance(tool_choice, str):1069                if tool_choice not in ("auto", "none", "required"):1070                    tool_choice = {1071                        "type": "function",1072                        "function": {"name": tool_choice},1073                    }1074            elif isinstance(tool_choice, bool):1075                tool_choice = formatted_tools[0]1076            elif isinstance(tool_choice, dict):1077                if (1078                    formatted_tools[0]["function"]["name"]1079                    != tool_choice["function"]["name"]1080                ):1081                    msg = (1082                        f"Tool choice {tool_choice} was specified, but the only "1083                        f"provided tool was {formatted_tools[0]['function']['name']}."1084                    )1085                    raise ValueError(msg)1086            else:1087                msg = (1088                    f"Unrecognized tool_choice type. Expected str, bool or dict. "1089                    f"Received: {tool_choice}"1090                )1091                raise ValueError(msg)1092            kwargs["tool_choice"] = tool_choice1093        return super().bind(tools=formatted_tools, **kwargs)10941095    def with_structured_output(1096        self,1097        schema: dict | type[BaseModel] | None = None,1098        *,1099        method: Literal[1100            "function_calling", "json_mode", "json_schema"1101        ] = "function_calling",1102        include_raw: bool = False,1103        **kwargs: Any,1104    ) -> Runnable[LanguageModelInput, dict | BaseModel]:1105        """Model wrapper that returns outputs formatted to match the given schema.11061107        Args:1108            schema: The output schema. Can be passed in as:11091110                - An OpenAI function/tool schema,1111                - A JSON Schema,1112                - A `TypedDict` class11131114                Pydantic class is currently supported.11151116            method: The method for steering model generation, one of:11171118                - `'function_calling'`: uses tool-calling features.1119                - `'json_schema'`: uses dedicated structured output features.1120                - `'json_mode'`: uses JSON mode.11211122            include_raw:1123                If `False` then only the parsed structured output is returned.11241125                If an error occurs during model output parsing it will be raised.11261127                If `True` then both the raw model response (a `BaseMessage`) and the1128                parsed model response will be returned.11291130                If an error occurs during output parsing it will be caught and returned1131                as well.11321133                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and1134                `'parsing_error'`.11351136            kwargs:1137                Additional parameters to pass to the underlying LLM's1138                `langchain_core.language_models.chat.BaseChatModel.bind`1139                method, such as `response_format` or `ls_structured_output_format`.11401141        Returns:1142            A `Runnable` that takes same inputs as a1143                `langchain_core.language_models.chat.BaseChatModel`. If `include_raw` is1144                `False` and `schema` is a Pydantic class, `Runnable` outputs an instance1145                of `schema` (i.e., a Pydantic object). Otherwise, if `include_raw` is1146                `False` then `Runnable` outputs a `dict`.11471148                If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:11491150                - `'raw'`: `BaseMessage`1151                - `'parsed'`: `None` if there was a parsing error, otherwise the type1152                    depends on the `schema` as described above.1153                - `'parsing_error'`: `BaseException | None`1154        """1155        _ = kwargs.pop("strict", None)1156        if kwargs:1157            msg = f"Received unsupported arguments {kwargs}"1158            raise ValueError(msg)1159        is_pydantic_schema = isinstance(schema, type) and is_basemodel_subclass(schema)1160        if method == "function_calling":1161            if schema is None:1162                msg = (1163                    "schema must be specified when method is 'function_calling'. "1164                    "Received None."1165                )1166                raise ValueError(msg)1167            formatted_tool = convert_to_openai_tool(schema)1168            tool_name = formatted_tool["function"]["name"]1169            llm = self.bind_tools(1170                [schema],1171                tool_choice=tool_name,1172                ls_structured_output_format={1173                    "kwargs": {"method": "function_calling"},1174                    "schema": formatted_tool,1175                },1176            )1177            if is_pydantic_schema:1178                msg = "Pydantic schema is not supported for function calling"1179                raise NotImplementedError(msg)1180            output_parser: JsonOutputKeyToolsParser | JsonOutputParser = (1181                JsonOutputKeyToolsParser(key_name=tool_name, first_tool_only=True)1182            )1183        elif method == "json_schema":1184            if schema is None:1185                msg = (1186                    "schema must be specified when method is 'json_schema'. "1187                    "Received None."1188                )1189                raise ValueError(msg)1190            formatted_schema = convert_to_json_schema(schema)1191            llm = self.bind(1192                response_format={"type": "json_object", "schema": formatted_schema},1193                ls_structured_output_format={1194                    "kwargs": {"method": "json_schema"},1195                    "schema": schema,1196                },1197            )1198            output_parser = JsonOutputParser()  # type: ignore[arg-type]1199        elif method == "json_mode":1200            llm = self.bind(1201                response_format={"type": "json_object"},1202                ls_structured_output_format={1203                    "kwargs": {"method": "json_mode"},1204                    "schema": schema,1205                },1206            )1207            output_parser = JsonOutputParser()  # type: ignore[arg-type]1208        else:1209            msg = (1210                f"Unrecognized method argument. Expected one of 'function_calling' or "1211                f"'json_mode'. Received: '{method}'"1212            )1213            raise ValueError(msg)12141215        if include_raw:1216            parser_assign = RunnablePassthrough.assign(1217                parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None1218            )1219            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)1220            parser_with_fallback = parser_assign.with_fallbacks(1221                [parser_none], exception_key="parsing_error"1222            )1223            return RunnableMap(raw=llm) | parser_with_fallback1224        return llm | output_parser12251226    def _create_message_dicts(1227        self, messages: list[BaseMessage], stop: list[str] | None1228    ) -> tuple[list[dict[str, Any]], dict[str, Any]]:1229        params = self._default_params1230        if stop is not None:1231            params["stop"] = stop1232        message_dicts = [_convert_message_to_dict(m) for m in messages]1233        return message_dicts, params12341235    @property1236    def _default_params(self) -> dict[str, Any]:1237        """Get default parameters for calling Hugging Face Inference Providers API."""1238        params = {1239            "model": self.model_id,1240            "stream": self.streaming,1241            "n": self.n,1242            "temperature": self.temperature,1243            "stop": self.stop,1244            **(self.model_kwargs if self.model_kwargs else {}),1245        }1246        if self.max_tokens is not None:1247            params["max_tokens"] = self.max_tokens1248        return params12491250    @property1251    def _llm_type(self) -> str:1252        return "huggingface-chat-wrapper"
Code quality findings 27

Overuse may indicate design issues; consider polymorphism
L137
isinstance-overuse
if isinstance(message, ChatMessage):
Overuse may indicate design issues; consider polymorphism
L139
isinstance-overuse
elif isinstance(message, HumanMessage):
Overuse may indicate design issues; consider polymorphism
L141
isinstance-overuse
elif isinstance(message, AIMessage):
Overuse may indicate design issues; consider polymorphism
L162
isinstance-overuse
elif isinstance(message, SystemMessage):
Overuse may indicate design issues; consider polymorphism
L164
isinstance-overuse
elif isinstance(message, FunctionMessage):
Overuse may indicate design issues; consider polymorphism
L170
isinstance-overuse
elif isinstance(message, ToolMessage):
Overuse may indicate design issues; consider polymorphism
L243
isinstance-overuse
return isinstance(llm, HuggingFaceHub)
Overuse may indicate design issues; consider polymorphism
L311
isinstance-overuse
return isinstance(llm, HuggingFaceTextGenInference)
Overuse may indicate design issues; consider polymorphism
L318
isinstance-overuse
return isinstance(llm, HuggingFaceEndpoint)
Overuse may indicate design issues; consider polymorphism
L322
isinstance-overuse
return isinstance(llm, HuggingFacePipeline)
Use logging module for better control and configurability
L410
print-statement
print(chunk)
Overuse may indicate design issues; consider polymorphism
L580
isinstance-overuse
and isinstance(self.llm.model_kwargs, dict)
Ensure functions have docstrings for documentation
L591
missing-docstring
def validate_llm(self) -> Self:
Ensure functions have docstrings for documentation
L612
missing-docstring
def from_model_id(
Avoid unnecessary list conversions; use generators where possible
L663
unnecessary-list
for k, v in list(kwargs.items()):
Overuse may indicate design issues; consider polymorphism
L707
isinstance-overuse
if token_usage and isinstance(message, AIMessage):
Overuse may indicate design issues; consider polymorphism
L823
isinstance-overuse
if isinstance(source, bool):
Overuse may indicate design issues; consider polymorphism
L961
isinstance-overuse
if not isinstance(messages[-1], HumanMessage):
Overuse may indicate design issues; consider polymorphism
L973
isinstance-overuse
if isinstance(message, SystemMessage):
Overuse may indicate design issues; consider polymorphism
L975
isinstance-overuse
elif isinstance(message, AIMessage):
Overuse may indicate design issues; consider polymorphism
L977
isinstance-overuse
elif isinstance(message, HumanMessage):
Ensure functions have docstrings for documentation
L1037
missing-docstring
def bind_tools(
Overuse may indicate design issues; consider polymorphism
L1068
isinstance-overuse
if isinstance(tool_choice, str):
Overuse may indicate design issues; consider polymorphism
L1074
isinstance-overuse
elif isinstance(tool_choice, bool):
Overuse may indicate design issues; consider polymorphism
L1076
isinstance-overuse
elif isinstance(tool_choice, dict):
Ensure functions have docstrings for documentation
L1095
missing-docstring
def with_structured_output(
Overuse may indicate design issues; consider polymorphism
L1159
isinstance-overuse
is_pydantic_schema = isinstance(schema, type) and is_basemodel_subclass(schema)
Code quality findings 27

Get this view in your editor