Ensure functions have docstrings for documentation
def from_model_id(
1from __future__ import annotations # type: ignore[import-not-found]23import importlib.util4import logging5from collections.abc import Iterator, Mapping6from typing import Any78from langchain_core.callbacks import CallbackManagerForLLMRun9from langchain_core.language_models.llms import BaseLLM10from langchain_core.outputs import Generation, GenerationChunk, LLMResult11from pydantic import ConfigDict, model_validator1213from langchain_huggingface.utils.import_utils import (14 IMPORT_ERROR,15 is_ipex_available,16 is_openvino_available,17 is_optimum_intel_available,18 is_optimum_intel_version,19)2021DEFAULT_MODEL_ID = "gpt2"22DEFAULT_TASK = "text-generation"23VALID_TASKS = (24 "text2text-generation",25 "text-generation",26 "image-text-to-text",27 "summarization",28 "translation",29)30DEFAULT_BATCH_SIZE = 431_MIN_OPTIMUM_VERSION = "1.21"323334logger = logging.getLogger(__name__)353637class HuggingFacePipeline(BaseLLM):38 """HuggingFace Pipeline API.3940 To use, you should have the `transformers` python package installed.4142 Only supports `text-generation`, `text2text-generation`, `image-text-to-text`,43 `summarization` and `translation` for now.4445 Example using from_model_id:46 ```python47 from langchain_huggingface import HuggingFacePipeline4849 hf = HuggingFacePipeline.from_model_id(50 model_id="gpt2",51 task="text-generation",52 pipeline_kwargs={"max_new_tokens": 10},53 )54 ```5556 Example passing pipeline in directly:57 ```python58 from langchain_huggingface import HuggingFacePipeline59 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline6061 model_id = "gpt2"62 tokenizer = AutoTokenizer.from_pretrained(model_id)63 model = AutoModelForCausalLM.from_pretrained(model_id)64 pipe = pipeline(65 "text-generation",66 model=model,67 tokenizer=tokenizer,68 max_new_tokens=10,69 )70 hf = HuggingFacePipeline(pipeline=pipe)71 ```72 """7374 pipeline: Any = None7576 model_id: str | None = None77 """The model name. If not set explicitly by the user,78 it will be inferred from the provided pipeline (if available).79 If neither is provided, the DEFAULT_MODEL_ID will be used."""8081 model_kwargs: dict | None = None82 """Keyword arguments passed to the model."""8384 pipeline_kwargs: dict | None = None85 """Keyword arguments passed to the pipeline."""8687 batch_size: int = DEFAULT_BATCH_SIZE88 """Batch size to use when passing multiple documents to generate."""8990 model_config = ConfigDict(91 extra="forbid",92 )9394 @model_validator(mode="before")95 @classmethod96 def pre_init_validator(cls, values: dict[str, Any]) -> dict[str, Any]:97 """Ensure model_id is set either by pipeline or user input."""98 if "model_id" not in values:99 if values.get("pipeline"):100 values["model_id"] = values["pipeline"].model.name_or_path101 else:102 values["model_id"] = DEFAULT_MODEL_ID103 return values104105 @classmethod106 def from_model_id(107 cls,108 model_id: str,109 task: str,110 backend: str = "default",111 device: int | None = None,112 device_map: str | None = None,113 model_kwargs: dict | None = None,114 pipeline_kwargs: dict | None = None,115 batch_size: int = DEFAULT_BATCH_SIZE,116 **kwargs: Any,117 ) -> HuggingFacePipeline:118 """Construct the pipeline object from model_id and task."""119 try:120 from transformers import ( # type: ignore[import]121 AutoModelForCausalLM,122 AutoModelForSeq2SeqLM,123 AutoTokenizer,124 )125 from transformers import pipeline as hf_pipeline # type: ignore[import]126127 except ImportError as e:128 msg = (129 "Could not import transformers python package. "130 "Please install it with `pip install transformers`."131 )132 raise ValueError(msg) from e133134 _model_kwargs = model_kwargs.copy() if model_kwargs else {}135 if device_map is not None:136 if device is not None:137 msg = (138 "Both `device` and `device_map` are specified. "139 "`device` will override `device_map`. "140 "You will most likely encounter unexpected behavior."141 "Please remove `device` and keep "142 "`device_map`."143 )144 raise ValueError(msg)145146 if "device_map" in _model_kwargs:147 msg = "`device_map` is already specified in `model_kwargs`."148 raise ValueError(msg)149150 _model_kwargs["device_map"] = device_map151 tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)152153 if backend in {"openvino", "ipex"}:154 if task not in VALID_TASKS:155 msg = (156 f"Got invalid task {task}, "157 f"currently only {VALID_TASKS} are supported"158 )159 raise ValueError(msg)160161 err_msg = f"Backend: {backend} {IMPORT_ERROR.format(f'optimum[{backend}]')}"162 if not is_optimum_intel_available():163 raise ImportError(err_msg)164165 # TODO: upgrade _MIN_OPTIMUM_VERSION to 1.22 after release166 min_optimum_version = (167 "1.22"168 if backend == "ipex" and task != "text-generation"169 else _MIN_OPTIMUM_VERSION170 )171 if is_optimum_intel_version("<", min_optimum_version):172 msg = (173 f"Backend: {backend} requires optimum-intel>="174 f"{min_optimum_version}. You can install it with pip: "175 "`pip install --upgrade --upgrade-strategy eager "176 f"`optimum[{backend}]`."177 )178 raise ImportError(msg)179180 if backend == "openvino":181 if not is_openvino_available():182 raise ImportError(err_msg)183184 from optimum.intel import ( # type: ignore[import]185 OVModelForCausalLM,186 OVModelForSeq2SeqLM,187 )188189 model_cls = (190 OVModelForCausalLM191 if task == "text-generation"192 else OVModelForSeq2SeqLM193 )194 else:195 if not is_ipex_available():196 raise ImportError(err_msg)197198 if task == "text-generation":199 from optimum.intel import (200 IPEXModelForCausalLM, # type: ignore[import]201 )202203 model_cls = IPEXModelForCausalLM204 else:205 from optimum.intel import (206 IPEXModelForSeq2SeqLM, # type: ignore[import]207 )208209 model_cls = IPEXModelForSeq2SeqLM210211 else:212 model_cls = (213 AutoModelForCausalLM214 if task == "text-generation"215 else AutoModelForSeq2SeqLM216 )217218 model = model_cls.from_pretrained(model_id, **_model_kwargs)219220 if tokenizer.pad_token is None:221 if model.config.pad_token_id is not None:222 tokenizer.pad_token_id = model.config.pad_token_id223 elif model.config.eos_token_id is not None and isinstance(224 model.config.eos_token_id, int225 ):226 tokenizer.pad_token_id = model.config.eos_token_id227 elif tokenizer.eos_token_id is not None:228 tokenizer.pad_token_id = tokenizer.eos_token_id229 else:230 tokenizer.add_special_tokens({"pad_token": "[PAD]"})231232 if (233 (234 getattr(model, "is_loaded_in_4bit", False)235 or getattr(model, "is_loaded_in_8bit", False)236 )237 and device is not None238 and backend == "default"239 ):240 logger.warning(241 f"Setting the `device` argument to None from {device} to avoid "242 "the error caused by attempting to move the model that was already "243 "loaded on the GPU using the Accelerate module to the same or "244 "another device."245 )246 device = None247248 if (249 device is not None250 and importlib.util.find_spec("torch") is not None251 and backend == "default"252 ):253 import torch254255 cuda_device_count = torch.cuda.device_count()256 if device < -1 or (device >= cuda_device_count):257 msg = (258 f"Got device=={device}, "259 f"device is required to be within [-1, {cuda_device_count})"260 )261 raise ValueError(msg)262 if device_map is not None and device < 0:263 device = None264 if device is not None and device < 0 and cuda_device_count > 0:265 logger.warning(266 "Device has %d GPUs available. "267 "Provide device={deviceId} to `from_model_id` to use available"268 "GPUs for execution. deviceId is -1 (default) for CPU and "269 "can be a positive integer associated with CUDA device id.",270 cuda_device_count,271 )272 if device is not None and device_map is not None and backend == "openvino":273 logger.warning("Please set device for OpenVINO through: `model_kwargs`")274 if "trust_remote_code" in _model_kwargs:275 _model_kwargs = {276 k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"277 }278 _pipeline_kwargs = pipeline_kwargs or {}279 pipeline = hf_pipeline( # type: ignore[call-overload]280 task=task,281 model=model,282 tokenizer=tokenizer,283 device=device,284 batch_size=batch_size,285 model_kwargs=_model_kwargs,286 **_pipeline_kwargs,287 )288 if pipeline.task not in VALID_TASKS:289 msg = (290 f"Got invalid task {pipeline.task}, "291 f"currently only {VALID_TASKS} are supported"292 )293 raise ValueError(msg)294 return cls(295 pipeline=pipeline,296 model_id=model_id,297 model_kwargs=_model_kwargs,298 pipeline_kwargs=_pipeline_kwargs,299 batch_size=batch_size,300 **kwargs,301 )302303 @property304 def _identifying_params(self) -> Mapping[str, Any]:305 """Get the identifying parameters."""306 return {307 "model_id": self.model_id,308 "model_kwargs": self.model_kwargs,309 "pipeline_kwargs": self.pipeline_kwargs,310 }311312 @property313 def _llm_type(self) -> str:314 return "huggingface_pipeline"315316 def _generate(317 self,318 prompts: list[str],319 stop: list[str] | None = None,320 run_manager: CallbackManagerForLLMRun | None = None,321 **kwargs: Any,322 ) -> LLMResult:323 # List to hold all results324 text_generations: list[str] = []325 pipeline_kwargs = kwargs.get("pipeline_kwargs", {})326 skip_prompt = kwargs.get("skip_prompt", False)327328 for i in range(0, len(prompts), self.batch_size):329 batch_prompts = prompts[i : i + self.batch_size]330331 # Process batch of prompts332 responses = self.pipeline(333 batch_prompts,334 **pipeline_kwargs,335 )336337 # Process each response in the batch338 for j, response in enumerate(responses):339 if isinstance(response, list):340 # if model returns multiple generations, pick the top one341 response = response[0]342343 if (344 self.pipeline.task == "text-generation"345 or self.pipeline.task == "text2text-generation"346 or self.pipeline.task == "image-text-to-text"347 ):348 text = response["generated_text"]349 elif self.pipeline.task == "summarization":350 text = response["summary_text"]351 elif self.pipeline.task in "translation":352 text = response["translation_text"]353 else:354 msg = (355 f"Got invalid task {self.pipeline.task}, "356 f"currently only {VALID_TASKS} are supported"357 )358 raise ValueError(msg)359 if skip_prompt:360 text = text[len(batch_prompts[j]) :]361 # Append the processed text to results362 text_generations.append(text)363364 return LLMResult(365 generations=[[Generation(text=text)] for text in text_generations]366 )367368 def _stream(369 self,370 prompt: str,371 stop: list[str] | None = None,372 run_manager: CallbackManagerForLLMRun | None = None,373 **kwargs: Any,374 ) -> Iterator[GenerationChunk]:375 from threading import Thread376377 import torch378 from transformers import (379 StoppingCriteria,380 StoppingCriteriaList,381 TextIteratorStreamer,382 )383384 pipeline_kwargs = kwargs.get("pipeline_kwargs", {})385 skip_prompt = kwargs.get("skip_prompt", True)386387 if stop is not None:388 stop = self.pipeline.tokenizer.convert_tokens_to_ids(stop)389 stopping_ids_list = stop or []390391 class StopOnTokens(StoppingCriteria):392 def __call__(393 self,394 input_ids: torch.LongTensor,395 scores: torch.FloatTensor,396 **kwargs: Any,397 ) -> bool:398 return any(input_ids[0][-1] == stop_id for stop_id in stopping_ids_list)399400 stopping_criteria = StoppingCriteriaList([StopOnTokens()])401402 streamer = TextIteratorStreamer(403 self.pipeline.tokenizer,404 timeout=60.0,405 skip_prompt=skip_prompt,406 skip_special_tokens=True,407 )408 generation_kwargs = dict(409 text_inputs=prompt,410 streamer=streamer,411 stopping_criteria=stopping_criteria,412 **pipeline_kwargs,413 )414 t1 = Thread(target=self.pipeline, kwargs=generation_kwargs)415 t1.start()416417 for char in streamer:418 chunk = GenerationChunk(text=char)419 if run_manager:420 run_manager.on_llm_new_token(chunk.text, chunk=chunk)421422 yield chunk
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.