libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py PYTHON 423 lines View on github.com → Search inside
1from __future__ import annotations  # type: ignore[import-not-found]23import importlib.util4import logging5from collections.abc import Iterator, Mapping6from typing import Any78from langchain_core.callbacks import CallbackManagerForLLMRun9from langchain_core.language_models.llms import BaseLLM10from langchain_core.outputs import Generation, GenerationChunk, LLMResult11from pydantic import ConfigDict, model_validator1213from langchain_huggingface.utils.import_utils import (14    IMPORT_ERROR,15    is_ipex_available,16    is_openvino_available,17    is_optimum_intel_available,18    is_optimum_intel_version,19)2021DEFAULT_MODEL_ID = "gpt2"22DEFAULT_TASK = "text-generation"23VALID_TASKS = (24    "text2text-generation",25    "text-generation",26    "image-text-to-text",27    "summarization",28    "translation",29)30DEFAULT_BATCH_SIZE = 431_MIN_OPTIMUM_VERSION = "1.21"323334logger = logging.getLogger(__name__)353637class HuggingFacePipeline(BaseLLM):38    """HuggingFace Pipeline API.3940    To use, you should have the `transformers` python package installed.4142    Only supports `text-generation`, `text2text-generation`, `image-text-to-text`,43    `summarization` and `translation`  for now.4445    Example using from_model_id:46        ```python47        from langchain_huggingface import HuggingFacePipeline4849        hf = HuggingFacePipeline.from_model_id(50            model_id="gpt2",51            task="text-generation",52            pipeline_kwargs={"max_new_tokens": 10},53        )54        ```5556    Example passing pipeline in directly:57        ```python58        from langchain_huggingface import HuggingFacePipeline59        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline6061        model_id = "gpt2"62        tokenizer = AutoTokenizer.from_pretrained(model_id)63        model = AutoModelForCausalLM.from_pretrained(model_id)64        pipe = pipeline(65            "text-generation",66            model=model,67            tokenizer=tokenizer,68            max_new_tokens=10,69        )70        hf = HuggingFacePipeline(pipeline=pipe)71        ```72    """7374    pipeline: Any = None7576    model_id: str | None = None77    """The model name. If not set explicitly by the user,78    it will be inferred from the provided pipeline (if available).79    If neither is provided, the DEFAULT_MODEL_ID will be used."""8081    model_kwargs: dict | None = None82    """Keyword arguments passed to the model."""8384    pipeline_kwargs: dict | None = None85    """Keyword arguments passed to the pipeline."""8687    batch_size: int = DEFAULT_BATCH_SIZE88    """Batch size to use when passing multiple documents to generate."""8990    model_config = ConfigDict(91        extra="forbid",92    )9394    @model_validator(mode="before")95    @classmethod96    def pre_init_validator(cls, values: dict[str, Any]) -> dict[str, Any]:97        """Ensure model_id is set either by pipeline or user input."""98        if "model_id" not in values:99            if values.get("pipeline"):100                values["model_id"] = values["pipeline"].model.name_or_path101            else:102                values["model_id"] = DEFAULT_MODEL_ID103        return values104105    @classmethod106    def from_model_id(107        cls,108        model_id: str,109        task: str,110        backend: str = "default",111        device: int | None = None,112        device_map: str | None = None,113        model_kwargs: dict | None = None,114        pipeline_kwargs: dict | None = None,115        batch_size: int = DEFAULT_BATCH_SIZE,116        **kwargs: Any,117    ) -> HuggingFacePipeline:118        """Construct the pipeline object from model_id and task."""119        try:120            from transformers import (  # type: ignore[import]121                AutoModelForCausalLM,122                AutoModelForSeq2SeqLM,123                AutoTokenizer,124            )125            from transformers import pipeline as hf_pipeline  # type: ignore[import]126127        except ImportError as e:128            msg = (129                "Could not import transformers python package. "130                "Please install it with `pip install transformers`."131            )132            raise ValueError(msg) from e133134        _model_kwargs = model_kwargs.copy() if model_kwargs else {}135        if device_map is not None:136            if device is not None:137                msg = (138                    "Both `device` and `device_map` are specified. "139                    "`device` will override `device_map`. "140                    "You will most likely encounter unexpected behavior."141                    "Please remove `device` and keep "142                    "`device_map`."143                )144                raise ValueError(msg)145146            if "device_map" in _model_kwargs:147                msg = "`device_map` is already specified in `model_kwargs`."148                raise ValueError(msg)149150            _model_kwargs["device_map"] = device_map151        tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)152153        if backend in {"openvino", "ipex"}:154            if task not in VALID_TASKS:155                msg = (156                    f"Got invalid task {task}, "157                    f"currently only {VALID_TASKS} are supported"158                )159                raise ValueError(msg)160161            err_msg = f"Backend: {backend} {IMPORT_ERROR.format(f'optimum[{backend}]')}"162            if not is_optimum_intel_available():163                raise ImportError(err_msg)164165            # TODO: upgrade _MIN_OPTIMUM_VERSION to 1.22 after release166            min_optimum_version = (167                "1.22"168                if backend == "ipex" and task != "text-generation"169                else _MIN_OPTIMUM_VERSION170            )171            if is_optimum_intel_version("<", min_optimum_version):172                msg = (173                    f"Backend: {backend} requires optimum-intel>="174                    f"{min_optimum_version}. You can install it with pip: "175                    "`pip install --upgrade --upgrade-strategy eager "176                    f"`optimum[{backend}]`."177                )178                raise ImportError(msg)179180            if backend == "openvino":181                if not is_openvino_available():182                    raise ImportError(err_msg)183184                from optimum.intel import (  # type: ignore[import]185                    OVModelForCausalLM,186                    OVModelForSeq2SeqLM,187                )188189                model_cls = (190                    OVModelForCausalLM191                    if task == "text-generation"192                    else OVModelForSeq2SeqLM193                )194            else:195                if not is_ipex_available():196                    raise ImportError(err_msg)197198                if task == "text-generation":199                    from optimum.intel import (200                        IPEXModelForCausalLM,  # type: ignore[import]201                    )202203                    model_cls = IPEXModelForCausalLM204                else:205                    from optimum.intel import (206                        IPEXModelForSeq2SeqLM,  # type: ignore[import]207                    )208209                    model_cls = IPEXModelForSeq2SeqLM210211        else:212            model_cls = (213                AutoModelForCausalLM214                if task == "text-generation"215                else AutoModelForSeq2SeqLM216            )217218        model = model_cls.from_pretrained(model_id, **_model_kwargs)219220        if tokenizer.pad_token is None:221            if model.config.pad_token_id is not None:222                tokenizer.pad_token_id = model.config.pad_token_id223            elif model.config.eos_token_id is not None and isinstance(224                model.config.eos_token_id, int225            ):226                tokenizer.pad_token_id = model.config.eos_token_id227            elif tokenizer.eos_token_id is not None:228                tokenizer.pad_token_id = tokenizer.eos_token_id229            else:230                tokenizer.add_special_tokens({"pad_token": "[PAD]"})231232        if (233            (234                getattr(model, "is_loaded_in_4bit", False)235                or getattr(model, "is_loaded_in_8bit", False)236            )237            and device is not None238            and backend == "default"239        ):240            logger.warning(241                f"Setting the `device` argument to None from {device} to avoid "242                "the error caused by attempting to move the model that was already "243                "loaded on the GPU using the Accelerate module to the same or "244                "another device."245            )246            device = None247248        if (249            device is not None250            and importlib.util.find_spec("torch") is not None251            and backend == "default"252        ):253            import torch254255            cuda_device_count = torch.cuda.device_count()256            if device < -1 or (device >= cuda_device_count):257                msg = (258                    f"Got device=={device}, "259                    f"device is required to be within [-1, {cuda_device_count})"260                )261                raise ValueError(msg)262            if device_map is not None and device < 0:263                device = None264            if device is not None and device < 0 and cuda_device_count > 0:265                logger.warning(266                    "Device has %d GPUs available. "267                    "Provide device={deviceId} to `from_model_id` to use available"268                    "GPUs for execution. deviceId is -1 (default) for CPU and "269                    "can be a positive integer associated with CUDA device id.",270                    cuda_device_count,271                )272        if device is not None and device_map is not None and backend == "openvino":273            logger.warning("Please set device for OpenVINO through: `model_kwargs`")274        if "trust_remote_code" in _model_kwargs:275            _model_kwargs = {276                k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"277            }278        _pipeline_kwargs = pipeline_kwargs or {}279        pipeline = hf_pipeline(  # type: ignore[call-overload]280            task=task,281            model=model,282            tokenizer=tokenizer,283            device=device,284            batch_size=batch_size,285            model_kwargs=_model_kwargs,286            **_pipeline_kwargs,287        )288        if pipeline.task not in VALID_TASKS:289            msg = (290                f"Got invalid task {pipeline.task}, "291                f"currently only {VALID_TASKS} are supported"292            )293            raise ValueError(msg)294        return cls(295            pipeline=pipeline,296            model_id=model_id,297            model_kwargs=_model_kwargs,298            pipeline_kwargs=_pipeline_kwargs,299            batch_size=batch_size,300            **kwargs,301        )302303    @property304    def _identifying_params(self) -> Mapping[str, Any]:305        """Get the identifying parameters."""306        return {307            "model_id": self.model_id,308            "model_kwargs": self.model_kwargs,309            "pipeline_kwargs": self.pipeline_kwargs,310        }311312    @property313    def _llm_type(self) -> str:314        return "huggingface_pipeline"315316    def _generate(317        self,318        prompts: list[str],319        stop: list[str] | None = None,320        run_manager: CallbackManagerForLLMRun | None = None,321        **kwargs: Any,322    ) -> LLMResult:323        # List to hold all results324        text_generations: list[str] = []325        pipeline_kwargs = kwargs.get("pipeline_kwargs", {})326        skip_prompt = kwargs.get("skip_prompt", False)327328        for i in range(0, len(prompts), self.batch_size):329            batch_prompts = prompts[i : i + self.batch_size]330331            # Process batch of prompts332            responses = self.pipeline(333                batch_prompts,334                **pipeline_kwargs,335            )336337            # Process each response in the batch338            for j, response in enumerate(responses):339                if isinstance(response, list):340                    # if model returns multiple generations, pick the top one341                    response = response[0]342343                if (344                    self.pipeline.task == "text-generation"345                    or self.pipeline.task == "text2text-generation"346                    or self.pipeline.task == "image-text-to-text"347                ):348                    text = response["generated_text"]349                elif self.pipeline.task == "summarization":350                    text = response["summary_text"]351                elif self.pipeline.task in "translation":352                    text = response["translation_text"]353                else:354                    msg = (355                        f"Got invalid task {self.pipeline.task}, "356                        f"currently only {VALID_TASKS} are supported"357                    )358                    raise ValueError(msg)359                if skip_prompt:360                    text = text[len(batch_prompts[j]) :]361                # Append the processed text to results362                text_generations.append(text)363364        return LLMResult(365            generations=[[Generation(text=text)] for text in text_generations]366        )367368    def _stream(369        self,370        prompt: str,371        stop: list[str] | None = None,372        run_manager: CallbackManagerForLLMRun | None = None,373        **kwargs: Any,374    ) -> Iterator[GenerationChunk]:375        from threading import Thread376377        import torch378        from transformers import (379            StoppingCriteria,380            StoppingCriteriaList,381            TextIteratorStreamer,382        )383384        pipeline_kwargs = kwargs.get("pipeline_kwargs", {})385        skip_prompt = kwargs.get("skip_prompt", True)386387        if stop is not None:388            stop = self.pipeline.tokenizer.convert_tokens_to_ids(stop)389        stopping_ids_list = stop or []390391        class StopOnTokens(StoppingCriteria):392            def __call__(393                self,394                input_ids: torch.LongTensor,395                scores: torch.FloatTensor,396                **kwargs: Any,397            ) -> bool:398                return any(input_ids[0][-1] == stop_id for stop_id in stopping_ids_list)399400        stopping_criteria = StoppingCriteriaList([StopOnTokens()])401402        streamer = TextIteratorStreamer(403            self.pipeline.tokenizer,404            timeout=60.0,405            skip_prompt=skip_prompt,406            skip_special_tokens=True,407        )408        generation_kwargs = dict(409            text_inputs=prompt,410            streamer=streamer,411            stopping_criteria=stopping_criteria,412            **pipeline_kwargs,413        )414        t1 = Thread(target=self.pipeline, kwargs=generation_kwargs)415        t1.start()416417        for char in streamer:418            chunk = GenerationChunk(text=char)419            if run_manager:420                run_manager.on_llm_new_token(chunk.text, chunk=chunk)421422            yield chunk

Code quality findings 3

Ensure functions have docstrings for documentation
missing-docstring
def from_model_id(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
elif model.config.eos_token_id is not None and isinstance(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(response, list):

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.