libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py · langchain-ai/langchain

1from __future__ import annotations  # type: ignore[import-not-found]23import importlib.util4import logging5from collections.abc import Iterator, Mapping6from typing import Any78from langchain_core.callbacks import CallbackManagerForLLMRun9from langchain_core.language_models.llms import BaseLLM10from langchain_core.outputs import Generation, GenerationChunk, LLMResult11from pydantic import ConfigDict, model_validator12from typing_extensions import Self1314from langchain_huggingface._version import __version__15from langchain_huggingface.utils.import_utils import (16    IMPORT_ERROR,17    is_ipex_available,18    is_openvino_available,19    is_optimum_intel_available,20    is_optimum_intel_version,21)2223DEFAULT_MODEL_ID = "gpt2"24DEFAULT_TASK = "text-generation"25VALID_TASKS = (26    "text2text-generation",27    "text-generation",28    "image-text-to-text",29    "summarization",30    "translation",31)32DEFAULT_BATCH_SIZE = 433_MIN_OPTIMUM_VERSION = "1.21"343536logger = logging.getLogger(__name__)373839class HuggingFacePipeline(BaseLLM):40    """HuggingFace Pipeline API.4142    To use, you should have the `transformers` python package installed.4344    Only supports `text-generation`, `text2text-generation`, `image-text-to-text`,45    `summarization` and `translation`  for now.4647    Example using from_model_id:48        ```python49        from langchain_huggingface import HuggingFacePipeline5051        hf = HuggingFacePipeline.from_model_id(52            model_id="gpt2",53            task="text-generation",54            pipeline_kwargs={"max_new_tokens": 10},55        )56        ```5758    Example passing pipeline in directly:59        ```python60        from langchain_huggingface import HuggingFacePipeline61        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline6263        model_id = "gpt2"64        tokenizer = AutoTokenizer.from_pretrained(model_id)65        model = AutoModelForCausalLM.from_pretrained(model_id)66        pipe = pipeline(67            "text-generation",68            model=model,69            tokenizer=tokenizer,70            max_new_tokens=10,71        )72        hf = HuggingFacePipeline(pipeline=pipe)73        ```74    """7576    pipeline: Any = None7778    model_id: str | None = None79    """The model name. If not set explicitly by the user,80    it will be inferred from the provided pipeline (if available).81    If neither is provided, the DEFAULT_MODEL_ID will be used."""8283    model_kwargs: dict | None = None84    """Keyword arguments passed to the model."""8586    pipeline_kwargs: dict | None = None87    """Keyword arguments passed to the pipeline."""8889    batch_size: int = DEFAULT_BATCH_SIZE90    """Batch size to use when passing multiple documents to generate."""9192    model_config = ConfigDict(93        extra="forbid",94    )9596    @model_validator(mode="after")97    def _set_huggingface_version(self) -> Self:98        """Set package version in metadata."""99        self._add_version("langchain-huggingface", __version__)100        return self101102    @model_validator(mode="before")103    @classmethod104    def pre_init_validator(cls, values: dict[str, Any]) -> dict[str, Any]:105        """Ensure model_id is set either by pipeline or user input."""106        if "model_id" not in values:107            if values.get("pipeline"):108                values["model_id"] = values["pipeline"].model.name_or_path109            else:110                values["model_id"] = DEFAULT_MODEL_ID111        return values112113    @classmethod114    def from_model_id(115        cls,116        model_id: str,117        task: str,118        backend: str = "default",119        device: int | None = None,120        device_map: str | None = None,121        model_kwargs: dict | None = None,122        pipeline_kwargs: dict | None = None,123        batch_size: int = DEFAULT_BATCH_SIZE,124        **kwargs: Any,125    ) -> HuggingFacePipeline:126        """Construct the pipeline object from model_id and task."""127        try:128            from transformers import (  # type: ignore[import]129                AutoModelForCausalLM,130                AutoModelForSeq2SeqLM,131                AutoTokenizer,132            )133            from transformers import pipeline as hf_pipeline  # type: ignore[import]134135        except ImportError as e:136            msg = (137                "Could not import transformers python package. "138                "Please install it with `pip install transformers`."139            )140            raise ValueError(msg) from e141142        _model_kwargs = model_kwargs.copy() if model_kwargs else {}143        if device_map is not None:144            if device is not None:145                msg = (146                    "Both `device` and `device_map` are specified. "147                    "`device` will override `device_map`. "148                    "You will most likely encounter unexpected behavior."149                    "Please remove `device` and keep "150                    "`device_map`."151                )152                raise ValueError(msg)153154            if "device_map" in _model_kwargs:155                msg = "`device_map` is already specified in `model_kwargs`."156                raise ValueError(msg)157158            _model_kwargs["device_map"] = device_map159        tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)160161        if backend in {"openvino", "ipex"}:162            if task not in VALID_TASKS:163                msg = (164                    f"Got invalid task {task}, "165                    f"currently only {VALID_TASKS} are supported"166                )167                raise ValueError(msg)168169            err_msg = f"Backend: {backend} {IMPORT_ERROR.format(f'optimum[{backend}]')}"170            if not is_optimum_intel_available():171                raise ImportError(err_msg)172173            # TODO: upgrade _MIN_OPTIMUM_VERSION to 1.22 after release174            min_optimum_version = (175                "1.22"176                if backend == "ipex" and task != "text-generation"177                else _MIN_OPTIMUM_VERSION178            )179            if is_optimum_intel_version("<", min_optimum_version):180                msg = (181                    f"Backend: {backend} requires optimum-intel>="182                    f"{min_optimum_version}. You can install it with pip: "183                    "`pip install --upgrade --upgrade-strategy eager "184                    f"`optimum[{backend}]`."185                )186                raise ImportError(msg)187188            if backend == "openvino":189                if not is_openvino_available():190                    raise ImportError(err_msg)191192                from optimum.intel import (  # type: ignore[import]193                    OVModelForCausalLM,194                    OVModelForSeq2SeqLM,195                )196197                model_cls = (198                    OVModelForCausalLM199                    if task == "text-generation"200                    else OVModelForSeq2SeqLM201                )202            else:203                if not is_ipex_available():204                    raise ImportError(err_msg)205206                if task == "text-generation":207                    from optimum.intel import (208                        IPEXModelForCausalLM,  # type: ignore[import]209                    )210211                    model_cls = IPEXModelForCausalLM212                else:213                    from optimum.intel import (214                        IPEXModelForSeq2SeqLM,  # type: ignore[import]215                    )216217                    model_cls = IPEXModelForSeq2SeqLM218219        else:220            model_cls = (221                AutoModelForCausalLM222                if task == "text-generation"223                else AutoModelForSeq2SeqLM224            )225226        model = model_cls.from_pretrained(model_id, **_model_kwargs)227228        if tokenizer.pad_token is None:229            if model.config.pad_token_id is not None:230                tokenizer.pad_token_id = model.config.pad_token_id231            elif model.config.eos_token_id is not None and isinstance(232                model.config.eos_token_id, int233            ):234                tokenizer.pad_token_id = model.config.eos_token_id235            elif tokenizer.eos_token_id is not None:236                tokenizer.pad_token_id = tokenizer.eos_token_id237            else:238                tokenizer.add_special_tokens({"pad_token": "[PAD]"})239240        if (241            (242                getattr(model, "is_loaded_in_4bit", False)243                or getattr(model, "is_loaded_in_8bit", False)244            )245            and device is not None246            and backend == "default"247        ):248            logger.warning(249                f"Setting the `device` argument to None from {device} to avoid "250                "the error caused by attempting to move the model that was already "251                "loaded on the GPU using the Accelerate module to the same or "252                "another device."253            )254            device = None255256        if (257            device is not None258            and importlib.util.find_spec("torch") is not None259            and backend == "default"260        ):261            import torch262263            cuda_device_count = torch.cuda.device_count()264            if device < -1 or (device >= cuda_device_count):265                msg = (266                    f"Got device=={device}, "267                    f"device is required to be within [-1, {cuda_device_count})"268                )269                raise ValueError(msg)270            if device_map is not None and device < 0:271                device = None272            if device is not None and device < 0 and cuda_device_count > 0:273                logger.warning(274                    "Device has %d GPUs available. "275                    "Provide device={deviceId} to `from_model_id` to use available"276                    "GPUs for execution. deviceId is -1 (default) for CPU and "277                    "can be a positive integer associated with CUDA device id.",278                    cuda_device_count,279                )280        if device is not None and device_map is not None and backend == "openvino":281            logger.warning("Please set device for OpenVINO through: `model_kwargs`")282        if "trust_remote_code" in _model_kwargs:283            _model_kwargs = {284                k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"285            }286        _pipeline_kwargs = pipeline_kwargs or {}287        pipeline = hf_pipeline(  # type: ignore[call-overload]288            task=task,289            model=model,290            tokenizer=tokenizer,291            device=device,292            batch_size=batch_size,293            model_kwargs=_model_kwargs,294            **_pipeline_kwargs,295        )296        if pipeline.task not in VALID_TASKS:297            msg = (298                f"Got invalid task {pipeline.task}, "299                f"currently only {VALID_TASKS} are supported"300            )301            raise ValueError(msg)302        return cls(303            pipeline=pipeline,304            model_id=model_id,305            model_kwargs=_model_kwargs,306            pipeline_kwargs=_pipeline_kwargs,307            batch_size=batch_size,308            **kwargs,309        )310311    @property312    def _identifying_params(self) -> Mapping[str, Any]:313        """Get the identifying parameters."""314        return {315            "model_id": self.model_id,316            "model_kwargs": self.model_kwargs,317            "pipeline_kwargs": self.pipeline_kwargs,318        }319320    @property321    def _llm_type(self) -> str:322        return "huggingface_pipeline"323324    def _generate(325        self,326        prompts: list[str],327        stop: list[str] | None = None,328        run_manager: CallbackManagerForLLMRun | None = None,329        **kwargs: Any,330    ) -> LLMResult:331        # List to hold all results332        text_generations: list[str] = []333        pipeline_kwargs = kwargs.get("pipeline_kwargs", {})334        skip_prompt = kwargs.get("skip_prompt", False)335336        for i in range(0, len(prompts), self.batch_size):337            batch_prompts = prompts[i : i + self.batch_size]338339            # Process batch of prompts340            responses = self.pipeline(341                batch_prompts,342                **pipeline_kwargs,343            )344345            # Process each response in the batch346            for j, response in enumerate(responses):347                if isinstance(response, list):348                    # if model returns multiple generations, pick the top one349                    response = response[0]350351                if (352                    self.pipeline.task == "text-generation"353                    or self.pipeline.task == "text2text-generation"354                    or self.pipeline.task == "image-text-to-text"355                ):356                    text = response["generated_text"]357                elif self.pipeline.task == "summarization":358                    text = response["summary_text"]359                elif self.pipeline.task in "translation":360                    text = response["translation_text"]361                else:362                    msg = (363                        f"Got invalid task {self.pipeline.task}, "364                        f"currently only {VALID_TASKS} are supported"365                    )366                    raise ValueError(msg)367                if skip_prompt:368                    text = text[len(batch_prompts[j]) :]369                # Append the processed text to results370                text_generations.append(text)371372        return LLMResult(373            generations=[[Generation(text=text)] for text in text_generations]374        )375376    def _stream(377        self,378        prompt: str,379        stop: list[str] | None = None,380        run_manager: CallbackManagerForLLMRun | None = None,381        **kwargs: Any,382    ) -> Iterator[GenerationChunk]:383        from threading import Thread384385        import torch386        from transformers import (387            StoppingCriteria,388            StoppingCriteriaList,389            TextIteratorStreamer,390        )391392        pipeline_kwargs = kwargs.get("pipeline_kwargs", {})393        skip_prompt = kwargs.get("skip_prompt", True)394395        if stop is not None:396            stop = self.pipeline.tokenizer.convert_tokens_to_ids(stop)397        stopping_ids_list = stop or []398399        class StopOnTokens(StoppingCriteria):400            def __call__(401                self,402                input_ids: torch.LongTensor,403                scores: torch.FloatTensor,404                **kwargs: Any,405            ) -> bool:406                return any(input_ids[0][-1] == stop_id for stop_id in stopping_ids_list)407408        stopping_criteria = StoppingCriteriaList([StopOnTokens()])409410        streamer = TextIteratorStreamer(411            self.pipeline.tokenizer,412            timeout=60.0,413            skip_prompt=skip_prompt,414            skip_special_tokens=True,415        )416        generation_kwargs = dict(417            text_inputs=prompt,418            streamer=streamer,419            stopping_criteria=stopping_criteria,420            **pipeline_kwargs,421        )422        t1 = Thread(target=self.pipeline, kwargs=generation_kwargs)423        t1.start()424425        for char in streamer:426            chunk = GenerationChunk(text=char)427            if run_manager:428                run_manager.on_llm_new_token(chunk.text, chunk=chunk)429430            yield chunk
Code quality findings 3

Ensure functions have docstrings for documentation
L114
missing-docstring
def from_model_id(
Overuse may indicate design issues; consider polymorphism
L231
isinstance-overuse
elif model.config.eos_token_id is not None and isinstance(
Overuse may indicate design issues; consider polymorphism
L347
isinstance-overuse
if isinstance(response, list):
Code quality findings 3

Get this view in your editor