Ensure functions have docstrings for documentation
def create_base_retry_decorator(
1"""Base interface for traditional large language models (LLMs) to expose.23These are traditionally older models (newer models generally are chat models).4"""56from __future__ import annotations78import asyncio9import builtins # noqa: TC00310import functools11import inspect12import json13import logging14from abc import ABC, abstractmethod15from collections.abc import AsyncIterator, Callable, Iterator, Sequence16from pathlib import Path17from typing import (18 TYPE_CHECKING,19 Any,20 cast,21)2223import yaml24from pydantic import ConfigDict25from tenacity import (26 RetryCallState,27 before_sleep_log,28 retry,29 retry_base,30 retry_if_exception_type,31 stop_after_attempt,32 wait_exponential,33)34from typing_extensions import override3536from langchain_core._api import deprecated, suppress_langchain_deprecation_warning37from langchain_core.caches import BaseCache38from langchain_core.callbacks import (39 AsyncCallbackManager,40 AsyncCallbackManagerForLLMRun,41 BaseCallbackManager,42 CallbackManager,43 CallbackManagerForLLMRun,44 Callbacks,45)46from langchain_core.globals import get_llm_cache47from langchain_core.language_models._utils import _filter_invocation_params_for_tracing48from langchain_core.language_models.base import (49 BaseLanguageModel,50 LangSmithParams,51 LanguageModelInput,52)53from langchain_core.load import dumpd54from langchain_core.messages import (55 convert_to_messages,56)57from langchain_core.outputs import Generation, GenerationChunk, LLMResult, RunInfo58from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue59from langchain_core.runnables import RunnableConfig, ensure_config, get_config_list60from langchain_core.runnables.config import run_in_executor6162if TYPE_CHECKING:63 import uuid6465logger = logging.getLogger(__name__)6667_background_tasks: set[asyncio.Task] = set()686970@functools.lru_cache71def _log_error_once(msg: str) -> None:72 """Log an error once."""73 logger.error(msg)747576def create_base_retry_decorator(77 error_types: list[type[BaseException]],78 max_retries: int = 1,79 run_manager: AsyncCallbackManagerForLLMRun | CallbackManagerForLLMRun | None = None,80) -> Callable[[Any], Any]:81 """Create a retry decorator for a given LLM and provided a list of error types.8283 Args:84 error_types: List of error types to retry on.85 max_retries: Number of retries.86 run_manager: Callback manager for the run.8788 Returns:89 A retry decorator.9091 Raises:92 ValueError: If the cache is not set and cache is True.93 """94 logging_ = before_sleep_log(logger, logging.WARNING)9596 def _before_sleep(retry_state: RetryCallState) -> None:97 logging_(retry_state)98 if run_manager:99 if isinstance(run_manager, AsyncCallbackManagerForLLMRun):100 coro = run_manager.on_retry(retry_state)101 try:102 try:103 loop = asyncio.get_event_loop()104 except RuntimeError:105 asyncio.run(coro)106 else:107 if loop.is_running():108 task = loop.create_task(coro)109 _background_tasks.add(task)110 task.add_done_callback(_background_tasks.discard)111 else:112 asyncio.run(coro)113 except Exception as e:114 _log_error_once(f"Error in on_retry: {e}")115 else:116 run_manager.on_retry(retry_state)117118 min_seconds = 4119 max_seconds = 10120 # Wait 2^x * 1 second between each retry starting with121 # 4 seconds, then up to 10 seconds, then 10 seconds afterwards122 retry_instance: retry_base = retry_if_exception_type(error_types[0])123 for error in error_types[1:]:124 retry_instance |= retry_if_exception_type(error)125 return retry(126 reraise=True,127 stop=stop_after_attempt(max_retries),128 wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds),129 retry=retry_instance,130 before_sleep=_before_sleep,131 )132133134def _resolve_cache(*, cache: BaseCache | bool | None) -> BaseCache | None:135 """Resolve the cache."""136 llm_cache: BaseCache | None137 if isinstance(cache, BaseCache):138 llm_cache = cache139 elif cache is None:140 llm_cache = get_llm_cache()141 elif cache is True:142 llm_cache = get_llm_cache()143 if llm_cache is None:144 msg = (145 "No global cache was configured. Use `set_llm_cache`."146 "to set a global cache if you want to use a global cache."147 "Otherwise either pass a cache object or set cache to False/None"148 )149 raise ValueError(msg)150 elif cache is False:151 llm_cache = None152 else:153 msg = f"Unsupported cache value {cache}"154 raise ValueError(msg)155 return llm_cache156157158def get_prompts(159 params: dict[str, Any],160 prompts: list[str],161 cache: BaseCache | bool | None = None, # noqa: FBT001162) -> tuple[dict[int, list], str, list[int], list[str]]:163 """Get prompts that are already cached.164165 Args:166 params: Dictionary of parameters.167 prompts: List of prompts.168 cache: Cache object.169170 Returns:171 A tuple of existing prompts, llm_string, missing prompt indexes,172 and missing prompts.173174 Raises:175 ValueError: If the cache is not set and cache is True.176 """177 llm_string = str(sorted(params.items()))178 missing_prompts = []179 missing_prompt_idxs = []180 existing_prompts = {}181182 llm_cache = _resolve_cache(cache=cache)183 for i, prompt in enumerate(prompts):184 if llm_cache:185 cache_val = llm_cache.lookup(prompt, llm_string)186 if isinstance(cache_val, list):187 existing_prompts[i] = cache_val188 else:189 missing_prompts.append(prompt)190 missing_prompt_idxs.append(i)191 return existing_prompts, llm_string, missing_prompt_idxs, missing_prompts192193194async def aget_prompts(195 params: dict[str, Any],196 prompts: list[str],197 cache: BaseCache | bool | None = None, # noqa: FBT001198) -> tuple[dict[int, list], str, list[int], list[str]]:199 """Get prompts that are already cached. Async version.200201 Args:202 params: Dictionary of parameters.203 prompts: List of prompts.204 cache: Cache object.205206 Returns:207 A tuple of existing prompts, llm_string, missing prompt indexes,208 and missing prompts.209210 Raises:211 ValueError: If the cache is not set and cache is True.212 """213 llm_string = str(sorted(params.items()))214 missing_prompts = []215 missing_prompt_idxs = []216 existing_prompts = {}217 llm_cache = _resolve_cache(cache=cache)218 for i, prompt in enumerate(prompts):219 if llm_cache:220 cache_val = await llm_cache.alookup(prompt, llm_string)221 if isinstance(cache_val, list):222 existing_prompts[i] = cache_val223 else:224 missing_prompts.append(prompt)225 missing_prompt_idxs.append(i)226 return existing_prompts, llm_string, missing_prompt_idxs, missing_prompts227228229def update_cache(230 cache: BaseCache | bool | None, # noqa: FBT001231 existing_prompts: dict[int, list],232 llm_string: str,233 missing_prompt_idxs: list[int],234 new_results: LLMResult,235 prompts: list[str],236) -> dict | None:237 """Update the cache and get the LLM output.238239 Args:240 cache: Cache object.241 existing_prompts: Dictionary of existing prompts.242 llm_string: LLM string.243 missing_prompt_idxs: List of missing prompt indexes.244 new_results: LLMResult object.245 prompts: List of prompts.246247 Returns:248 LLM output.249250 Raises:251 ValueError: If the cache is not set and cache is True.252 """253 llm_cache = _resolve_cache(cache=cache)254 for i, result in enumerate(new_results.generations):255 existing_prompts[missing_prompt_idxs[i]] = result256 prompt = prompts[missing_prompt_idxs[i]]257 if llm_cache is not None:258 llm_cache.update(prompt, llm_string, result)259 return new_results.llm_output260261262async def aupdate_cache(263 cache: BaseCache | bool | None, # noqa: FBT001264 existing_prompts: dict[int, list],265 llm_string: str,266 missing_prompt_idxs: list[int],267 new_results: LLMResult,268 prompts: list[str],269) -> dict | None:270 """Update the cache and get the LLM output. Async version.271272 Args:273 cache: Cache object.274 existing_prompts: Dictionary of existing prompts.275 llm_string: LLM string.276 missing_prompt_idxs: List of missing prompt indexes.277 new_results: LLMResult object.278 prompts: List of prompts.279280 Returns:281 LLM output.282283 Raises:284 ValueError: If the cache is not set and cache is True.285 """286 llm_cache = _resolve_cache(cache=cache)287 for i, result in enumerate(new_results.generations):288 existing_prompts[missing_prompt_idxs[i]] = result289 prompt = prompts[missing_prompt_idxs[i]]290 if llm_cache:291 await llm_cache.aupdate(prompt, llm_string, result)292 return new_results.llm_output293294295class BaseLLM(BaseLanguageModel[str], ABC):296 """Base LLM abstract interface.297298 It should take in a prompt and return a string.299 """300301 model_config = ConfigDict(302 arbitrary_types_allowed=True,303 )304305 @functools.cached_property306 def _serialized(self) -> builtins.dict[str, Any]:307 # self is always a Serializable object in this case, thus the result is308 # guaranteed to be a dict since dumpd uses the default callback, which uses309 # obj.to_json which always returns TypedDict subclasses310 return cast("builtins.dict[str, Any]", dumpd(self))311312 # --- Runnable methods ---313314 @property315 @override316 def OutputType(self) -> type[str]:317 """Get the output type for this `Runnable`."""318 return str319320 def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:321 if isinstance(model_input, PromptValue):322 return model_input323 if isinstance(model_input, str):324 return StringPromptValue(text=model_input)325 if isinstance(model_input, Sequence):326 return ChatPromptValue(messages=convert_to_messages(model_input))327 msg = (328 f"Invalid input type {type(model_input)}. "329 "Must be a PromptValue, str, or list of BaseMessages."330 )331 raise ValueError(msg)332333 def _get_ls_params(334 self,335 stop: list[str] | None = None,336 **kwargs: Any,337 ) -> LangSmithParams:338 """Get standard params for tracing."""339 # get default provider from class name340 default_provider = self.__class__.__name__341 default_provider = default_provider.removesuffix("LLM")342 default_provider = default_provider.lower()343344 ls_params = LangSmithParams(ls_provider=default_provider, ls_model_type="llm")345 if stop:346 ls_params["ls_stop"] = stop347348 # model349 if "model" in kwargs and isinstance(kwargs["model"], str):350 ls_params["ls_model_name"] = kwargs["model"]351 elif hasattr(self, "model") and isinstance(self.model, str):352 ls_params["ls_model_name"] = self.model353 elif hasattr(self, "model_name") and isinstance(self.model_name, str):354 ls_params["ls_model_name"] = self.model_name355356 # temperature357 if "temperature" in kwargs and isinstance(kwargs["temperature"], (int, float)):358 ls_params["ls_temperature"] = kwargs["temperature"]359 elif hasattr(self, "temperature") and isinstance(360 self.temperature, (int, float)361 ):362 ls_params["ls_temperature"] = self.temperature363364 # max_tokens365 if "max_tokens" in kwargs and isinstance(kwargs["max_tokens"], int):366 ls_params["ls_max_tokens"] = kwargs["max_tokens"]367 elif hasattr(self, "max_tokens") and isinstance(self.max_tokens, int):368 ls_params["ls_max_tokens"] = self.max_tokens369370 return ls_params371372 @override373 def invoke(374 self,375 input: LanguageModelInput,376 config: RunnableConfig | None = None,377 *,378 stop: list[str] | None = None,379 **kwargs: Any,380 ) -> str:381 config = ensure_config(config)382 return (383 self.generate_prompt(384 [self._convert_input(input)],385 stop=stop,386 callbacks=config.get("callbacks"),387 tags=config.get("tags"),388 metadata=config.get("metadata"),389 run_name=config.get("run_name"),390 run_id=config.pop("run_id", None),391 **kwargs,392 )393 .generations[0][0]394 .text395 )396397 @override398 async def ainvoke(399 self,400 input: LanguageModelInput,401 config: RunnableConfig | None = None,402 *,403 stop: list[str] | None = None,404 **kwargs: Any,405 ) -> str:406 config = ensure_config(config)407 llm_result = await self.agenerate_prompt(408 [self._convert_input(input)],409 stop=stop,410 callbacks=config.get("callbacks"),411 tags=config.get("tags"),412 metadata=config.get("metadata"),413 run_name=config.get("run_name"),414 run_id=config.pop("run_id", None),415 **kwargs,416 )417 return llm_result.generations[0][0].text418419 @override420 def batch(421 self,422 inputs: list[LanguageModelInput],423 config: RunnableConfig | list[RunnableConfig] | None = None,424 *,425 return_exceptions: bool = False,426 **kwargs: Any,427 ) -> list[str]:428 if not inputs:429 return []430431 config = get_config_list(config, len(inputs))432 max_concurrency = config[0].get("max_concurrency")433434 if max_concurrency is None:435 try:436 llm_result = self.generate_prompt(437 [self._convert_input(input_) for input_ in inputs],438 callbacks=[c.get("callbacks") for c in config],439 tags=[c.get("tags") for c in config],440 metadata=[c.get("metadata") for c in config],441 run_name=[c.get("run_name") for c in config],442 **kwargs,443 )444 return [g[0].text for g in llm_result.generations]445 except Exception as e:446 if return_exceptions:447 return cast("list[str]", [e for _ in inputs])448 raise449 else:450 batches = [451 inputs[i : i + max_concurrency]452 for i in range(0, len(inputs), max_concurrency)453 ]454 config = [{**c, "max_concurrency": None} for c in config]455 return [456 output457 for i, batch in enumerate(batches)458 for output in self.batch(459 batch,460 config=config[i * max_concurrency : (i + 1) * max_concurrency],461 return_exceptions=return_exceptions,462 **kwargs,463 )464 ]465466 @override467 async def abatch(468 self,469 inputs: list[LanguageModelInput],470 config: RunnableConfig | list[RunnableConfig] | None = None,471 *,472 return_exceptions: bool = False,473 **kwargs: Any,474 ) -> list[str]:475 if not inputs:476 return []477 config = get_config_list(config, len(inputs))478 max_concurrency = config[0].get("max_concurrency")479480 if max_concurrency is None:481 try:482 llm_result = await self.agenerate_prompt(483 [self._convert_input(input_) for input_ in inputs],484 callbacks=[c.get("callbacks") for c in config],485 tags=[c.get("tags") for c in config],486 metadata=[c.get("metadata") for c in config],487 run_name=[c.get("run_name") for c in config],488 **kwargs,489 )490 return [g[0].text for g in llm_result.generations]491 except Exception as e:492 if return_exceptions:493 return cast("list[str]", [e for _ in inputs])494 raise495 else:496 batches = [497 inputs[i : i + max_concurrency]498 for i in range(0, len(inputs), max_concurrency)499 ]500 config = [{**c, "max_concurrency": None} for c in config]501 return [502 output503 for i, batch in enumerate(batches)504 for output in await self.abatch(505 batch,506 config=config[i * max_concurrency : (i + 1) * max_concurrency],507 return_exceptions=return_exceptions,508 **kwargs,509 )510 ]511512 @override513 def stream(514 self,515 input: LanguageModelInput,516 config: RunnableConfig | None = None,517 *,518 stop: list[str] | None = None,519 **kwargs: Any,520 ) -> Iterator[str]:521 if type(self)._stream == BaseLLM._stream: # noqa: SLF001522 # model doesn't implement streaming, so use default implementation523 yield self.invoke(input, config=config, stop=stop, **kwargs)524 else:525 prompt = self._convert_input(input).to_string()526 config = ensure_config(config)527 params = self._dict_for_compat()528 params["stop"] = stop529 params = {**params, **kwargs}530 options = {"stop": stop}531 inheritable_metadata = {532 **(config.get("metadata") or {}),533 **self._get_ls_params_with_defaults(stop=stop, **kwargs),534 }535 callback_manager = CallbackManager.configure(536 config.get("callbacks"),537 self.callbacks,538 self.verbose,539 config.get("tags"),540 self.tags,541 inheritable_metadata,542 self.metadata,543 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(544 params545 ),546 )547 (run_manager,) = callback_manager.on_llm_start(548 self._serialized,549 [prompt],550 invocation_params=params,551 options=options,552 name=config.get("run_name"),553 run_id=config.pop("run_id", None),554 batch_size=1,555 )556 generation: GenerationChunk | None = None557 try:558 for chunk in self._stream(559 prompt, stop=stop, run_manager=run_manager, **kwargs560 ):561 yield chunk.text562 if generation is None:563 generation = chunk564 else:565 generation += chunk566 except BaseException as e:567 run_manager.on_llm_error(568 e,569 response=LLMResult(570 generations=[[generation]] if generation else []571 ),572 )573 raise574575 if generation is None:576 err = ValueError("No generation chunks were returned")577 run_manager.on_llm_error(err, response=LLMResult(generations=[]))578 raise err579580 run_manager.on_llm_end(LLMResult(generations=[[generation]]))581582 @override583 async def astream(584 self,585 input: LanguageModelInput,586 config: RunnableConfig | None = None,587 *,588 stop: list[str] | None = None,589 **kwargs: Any,590 ) -> AsyncIterator[str]:591 if (592 type(self)._astream is BaseLLM._astream # noqa: SLF001593 and type(self)._stream is BaseLLM._stream # noqa: SLF001594 ):595 yield await self.ainvoke(input, config=config, stop=stop, **kwargs)596 return597598 prompt = self._convert_input(input).to_string()599 config = ensure_config(config)600 params = self._dict_for_compat()601 params["stop"] = stop602 params = {**params, **kwargs}603 options = {"stop": stop}604 inheritable_metadata = {605 **(config.get("metadata") or {}),606 **self._get_ls_params_with_defaults(stop=stop, **kwargs),607 }608 callback_manager = AsyncCallbackManager.configure(609 config.get("callbacks"),610 self.callbacks,611 self.verbose,612 config.get("tags"),613 self.tags,614 inheritable_metadata,615 self.metadata,616 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(617 params618 ),619 )620 (run_manager,) = await callback_manager.on_llm_start(621 self._serialized,622 [prompt],623 invocation_params=params,624 options=options,625 name=config.get("run_name"),626 run_id=config.pop("run_id", None),627 batch_size=1,628 )629 generation: GenerationChunk | None = None630 try:631 async for chunk in self._astream(632 prompt,633 stop=stop,634 run_manager=run_manager,635 **kwargs,636 ):637 yield chunk.text638 if generation is None:639 generation = chunk640 else:641 generation += chunk642 except BaseException as e:643 await run_manager.on_llm_error(644 e,645 response=LLMResult(generations=[[generation]] if generation else []),646 )647 raise648649 if generation is None:650 err = ValueError("No generation chunks were returned")651 await run_manager.on_llm_error(err, response=LLMResult(generations=[]))652 raise err653654 await run_manager.on_llm_end(LLMResult(generations=[[generation]]))655656 # --- Custom methods ---657658 @abstractmethod659 def _generate(660 self,661 prompts: list[str],662 stop: list[str] | None = None,663 run_manager: CallbackManagerForLLMRun | None = None,664 **kwargs: Any,665 ) -> LLMResult:666 """Run the LLM on the given prompts.667668 Args:669 prompts: The prompts to generate from.670 stop: Stop words to use when generating.671672 Model output is cut off at the first occurrence of any of these673 substrings.674675 If stop tokens are not supported consider raising `NotImplementedError`.676 run_manager: Callback manager for the run.677678 Returns:679 The LLM result.680 """681682 async def _agenerate(683 self,684 prompts: list[str],685 stop: list[str] | None = None,686 run_manager: AsyncCallbackManagerForLLMRun | None = None,687 **kwargs: Any,688 ) -> LLMResult:689 """Run the LLM on the given prompts.690691 Args:692 prompts: The prompts to generate from.693 stop: Stop words to use when generating.694695 Model output is cut off at the first occurrence of any of these696 substrings.697698 If stop tokens are not supported consider raising `NotImplementedError`.699 run_manager: Callback manager for the run.700701 Returns:702 The LLM result.703 """704 return await run_in_executor(705 None,706 self._generate,707 prompts,708 stop,709 run_manager.get_sync() if run_manager else None,710 **kwargs,711 )712713 def _stream(714 self,715 prompt: str,716 stop: list[str] | None = None,717 run_manager: CallbackManagerForLLMRun | None = None,718 **kwargs: Any,719 ) -> Iterator[GenerationChunk]:720 """Stream the LLM on the given prompt.721722 This method should be overridden by subclasses that support streaming.723724 If not implemented, the default behavior of calls to stream will be to725 fallback to the non-streaming version of the model and return726 the output as a single chunk.727728 Args:729 prompt: The prompt to generate from.730 stop: Stop words to use when generating.731732 Model output is cut off at the first occurrence of any of these733 substrings.734 run_manager: Callback manager for the run.735 **kwargs: Arbitrary additional keyword arguments.736737 These are usually passed to the model provider API call.738739 Yields:740 Generation chunks.741 """742 raise NotImplementedError743744 async def _astream(745 self,746 prompt: str,747 stop: list[str] | None = None,748 run_manager: AsyncCallbackManagerForLLMRun | None = None,749 **kwargs: Any,750 ) -> AsyncIterator[GenerationChunk]:751 """An async version of the _stream method.752753 The default implementation uses the synchronous _stream method and wraps it in754 an async iterator. Subclasses that need to provide a true async implementation755 should override this method.756757 Args:758 prompt: The prompt to generate from.759 stop: Stop words to use when generating.760761 Model output is cut off at the first occurrence of any of these762 substrings.763 run_manager: Callback manager for the run.764 **kwargs: Arbitrary additional keyword arguments.765766 These are usually passed to the model provider API call.767768 Yields:769 Generation chunks.770 """771 iterator = await run_in_executor(772 None,773 self._stream,774 prompt,775 stop,776 run_manager.get_sync() if run_manager else None,777 **kwargs,778 )779 done = object()780 while True:781 item = await run_in_executor(782 None,783 next,784 iterator,785 done,786 )787 if item is done:788 break789 yield item # type: ignore[misc]790791 @override792 def generate_prompt(793 self,794 prompts: list[PromptValue],795 stop: list[str] | None = None,796 callbacks: Callbacks | list[Callbacks] | None = None,797 **kwargs: Any,798 ) -> LLMResult:799 prompt_strings = [p.to_string() for p in prompts]800 return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)801802 @override803 async def agenerate_prompt(804 self,805 prompts: list[PromptValue],806 stop: list[str] | None = None,807 callbacks: Callbacks | list[Callbacks] | None = None,808 **kwargs: Any,809 ) -> LLMResult:810 prompt_strings = [p.to_string() for p in prompts]811 return await self.agenerate(812 prompt_strings, stop=stop, callbacks=callbacks, **kwargs813 )814815 def _generate_helper(816 self,817 prompts: list[str],818 stop: list[str] | None,819 run_managers: list[CallbackManagerForLLMRun],820 *,821 new_arg_supported: bool,822 **kwargs: Any,823 ) -> LLMResult:824 try:825 output = (826 self._generate(827 prompts,828 stop=stop,829 # TODO: support multiple run managers830 run_manager=run_managers[0] if run_managers else None,831 **kwargs,832 )833 if new_arg_supported834 else self._generate(prompts, stop=stop)835 )836 except BaseException as e:837 for run_manager in run_managers:838 run_manager.on_llm_error(e, response=LLMResult(generations=[]))839 raise840 flattened_outputs = output.flatten()841 for manager, flattened_output in zip(842 run_managers, flattened_outputs, strict=False843 ):844 manager.on_llm_end(flattened_output)845 if run_managers:846 output.run = [847 RunInfo(run_id=run_manager.run_id) for run_manager in run_managers848 ]849 return output850851 def generate(852 self,853 prompts: list[str],854 stop: list[str] | None = None,855 callbacks: Callbacks | list[Callbacks] | None = None,856 *,857 tags: list[str] | list[list[str]] | None = None,858 metadata: builtins.dict[str, Any] | list[builtins.dict[str, Any]] | None = None,859 run_name: str | list[str] | None = None,860 run_id: uuid.UUID | list[uuid.UUID | None] | None = None,861 **kwargs: Any,862 ) -> LLMResult:863 """Pass a sequence of prompts to a model and return generations.864865 This method should make use of batched calls for models that expose a batched866 API.867868 Use this method when you want to:869870 1. Take advantage of batched calls,871 2. Need more output from the model than just the top generated value,872 3. Are building chains that are agnostic to the underlying language model873 type (e.g., pure text completion models vs chat models).874875 Args:876 prompts: List of string prompts.877 stop: Stop words to use when generating.878879 Model output is cut off at the first occurrence of any of these880 substrings.881 callbacks: `Callbacks` to pass through.882883 Used for executing additional functionality, such as logging or884 streaming, throughout generation.885 tags: List of tags to associate with each prompt. If provided, the length886 of the list must match the length of the prompts list.887 metadata: List of metadata dictionaries to associate with each prompt. If888 provided, the length of the list must match the length of the prompts889 list.890 run_name: List of run names to associate with each prompt. If provided, the891 length of the list must match the length of the prompts list.892 run_id: List of run IDs to associate with each prompt. If provided, the893 length of the list must match the length of the prompts list.894 **kwargs: Arbitrary additional keyword arguments.895896 These are usually passed to the model provider API call.897898 Raises:899 ValueError: If prompts is not a list.900 ValueError: If the length of `callbacks`, `tags`, `metadata`, or901 `run_name` (if provided) does not match the length of prompts.902903 Returns:904 An `LLMResult`, which contains a list of candidate `Generations` for each905 input prompt and additional model provider-specific output.906 """907 if not isinstance(prompts, list):908 msg = (909 "Argument 'prompts' is expected to be of type list[str], received"910 f" argument of type {type(prompts)}."911 )912 raise ValueError(msg) # noqa: TRY004913 # Create callback managers914 if isinstance(metadata, list):915 metadata = [916 {917 **(meta or {}),918 **self._get_ls_params_with_defaults(stop=stop, **kwargs),919 }920 for meta in metadata921 ]922 elif isinstance(metadata, dict):923 metadata = {924 **(metadata or {}),925 **self._get_ls_params_with_defaults(stop=stop, **kwargs),926 }927 if (928 isinstance(callbacks, list)929 and callbacks930 and (931 isinstance(callbacks[0], (list, BaseCallbackManager))932 or callbacks[0] is None933 )934 ):935 # We've received a list of callbacks args to apply to each input936 if len(callbacks) != len(prompts):937 msg = "callbacks must be the same length as prompts"938 raise ValueError(msg)939 if tags is not None and not (940 isinstance(tags, list) and len(tags) == len(prompts)941 ):942 msg = "tags must be a list of the same length as prompts"943 raise ValueError(msg)944 if metadata is not None and not (945 isinstance(metadata, list) and len(metadata) == len(prompts)946 ):947 msg = "metadata must be a list of the same length as prompts"948 raise ValueError(msg)949 if run_name is not None and not (950 isinstance(run_name, list) and len(run_name) == len(prompts)951 ):952 msg = "run_name must be a list of the same length as prompts"953 raise ValueError(msg)954 callbacks = cast("list[Callbacks]", callbacks)955 tags_list = cast("list[list[str] | None]", tags or ([None] * len(prompts)))956 metadata_list = cast(957 "list[dict[str, Any] | None]", metadata or ([{}] * len(prompts))958 )959 run_name_list = run_name or cast(960 "list[str | None]", ([None] * len(prompts))961 )962 params = self._dict_for_compat()963 params["stop"] = stop964 callback_managers = [965 CallbackManager.configure(966 callback,967 self.callbacks,968 self.verbose,969 tag,970 self.tags,971 meta,972 self.metadata,973 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(974 params975 ),976 )977 for callback, tag, meta in zip(978 callbacks, tags_list, metadata_list, strict=False979 )980 ]981 else:982 # We've received a single callbacks arg to apply to all inputs983 params = self._dict_for_compat()984 params["stop"] = stop985 callback_managers = [986 CallbackManager.configure(987 cast("Callbacks", callbacks),988 self.callbacks,989 self.verbose,990 cast("list[str]", tags),991 self.tags,992 cast("dict[str, Any]", metadata),993 self.metadata,994 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(995 params996 ),997 )998 ] * len(prompts)999 run_name_list = [cast("str | None", run_name)] * len(prompts)1000 run_ids_list = self._get_run_ids_list(run_id, prompts)1001 options = {"stop": stop}1002 (1003 existing_prompts,1004 llm_string,1005 missing_prompt_idxs,1006 missing_prompts,1007 ) = get_prompts(params, prompts, self.cache)1008 new_arg_supported = inspect.signature(self._generate).parameters.get(1009 "run_manager"1010 )1011 if (self.cache is None and get_llm_cache() is None) or self.cache is False:1012 run_managers = [1013 callback_manager.on_llm_start(1014 self._serialized,1015 [prompt],1016 invocation_params=params,1017 options=options,1018 name=run_name,1019 batch_size=len(prompts),1020 run_id=run_id_,1021 )[0]1022 for callback_manager, prompt, run_name, run_id_ in zip(1023 callback_managers,1024 prompts,1025 run_name_list,1026 run_ids_list,1027 strict=False,1028 )1029 ]1030 return self._generate_helper(1031 prompts,1032 stop,1033 run_managers,1034 new_arg_supported=bool(new_arg_supported),1035 **kwargs,1036 )1037 if len(missing_prompts) > 0:1038 run_managers = [1039 callback_managers[idx].on_llm_start(1040 self._serialized,1041 [prompts[idx]],1042 invocation_params=params,1043 options=options,1044 name=run_name_list[idx],1045 batch_size=len(missing_prompts),1046 )[0]1047 for idx in missing_prompt_idxs1048 ]1049 new_results = self._generate_helper(1050 missing_prompts,1051 stop,1052 run_managers,1053 new_arg_supported=bool(new_arg_supported),1054 **kwargs,1055 )1056 llm_output = update_cache(1057 self.cache,1058 existing_prompts,1059 llm_string,1060 missing_prompt_idxs,1061 new_results,1062 prompts,1063 )1064 run_info = (1065 [RunInfo(run_id=run_manager.run_id) for run_manager in run_managers]1066 if run_managers1067 else None1068 )1069 else:1070 llm_output = {}1071 run_info = None1072 generations = [existing_prompts[i] for i in range(len(prompts))]1073 return LLMResult(generations=generations, llm_output=llm_output, run=run_info)10741075 @staticmethod1076 def _get_run_ids_list(1077 run_id: uuid.UUID | list[uuid.UUID | None] | None, prompts: list1078 ) -> list:1079 if run_id is None:1080 return [None] * len(prompts)1081 if isinstance(run_id, list):1082 if len(run_id) != len(prompts):1083 msg = (1084 "Number of manually provided run_id's does not match batch length."1085 f" {len(run_id)} != {len(prompts)}"1086 )1087 raise ValueError(msg)1088 return run_id1089 return [run_id] + [None] * (len(prompts) - 1)10901091 async def _agenerate_helper(1092 self,1093 prompts: list[str],1094 stop: list[str] | None,1095 run_managers: list[AsyncCallbackManagerForLLMRun],1096 *,1097 new_arg_supported: bool,1098 **kwargs: Any,1099 ) -> LLMResult:1100 try:1101 output = (1102 await self._agenerate(1103 prompts,1104 stop=stop,1105 run_manager=run_managers[0] if run_managers else None,1106 **kwargs,1107 )1108 if new_arg_supported1109 else await self._agenerate(prompts, stop=stop)1110 )1111 except BaseException as e:1112 await asyncio.gather(1113 *[1114 run_manager.on_llm_error(e, response=LLMResult(generations=[]))1115 for run_manager in run_managers1116 ]1117 )1118 raise1119 flattened_outputs = output.flatten()1120 await asyncio.gather(1121 *[1122 run_manager.on_llm_end(flattened_output)1123 for run_manager, flattened_output in zip(1124 run_managers, flattened_outputs, strict=False1125 )1126 ]1127 )1128 if run_managers:1129 output.run = [1130 RunInfo(run_id=run_manager.run_id) for run_manager in run_managers1131 ]1132 return output11331134 async def agenerate(1135 self,1136 prompts: list[str],1137 stop: list[str] | None = None,1138 callbacks: Callbacks | list[Callbacks] | None = None,1139 *,1140 tags: list[str] | list[list[str]] | None = None,1141 metadata: builtins.dict[str, Any] | list[builtins.dict[str, Any]] | None = None,1142 run_name: str | list[str] | None = None,1143 run_id: uuid.UUID | list[uuid.UUID | None] | None = None,1144 **kwargs: Any,1145 ) -> LLMResult:1146 """Asynchronously pass a sequence of prompts to a model and return generations.11471148 This method should make use of batched calls for models that expose a batched1149 API.11501151 Use this method when you want to:11521153 1. Take advantage of batched calls,1154 2. Need more output from the model than just the top generated value,1155 3. Are building chains that are agnostic to the underlying language model1156 type (e.g., pure text completion models vs chat models).11571158 Args:1159 prompts: List of string prompts.1160 stop: Stop words to use when generating.11611162 Model output is cut off at the first occurrence of any of these1163 substrings.1164 callbacks: `Callbacks` to pass through.11651166 Used for executing additional functionality, such as logging or1167 streaming, throughout generation.1168 tags: List of tags to associate with each prompt. If provided, the length1169 of the list must match the length of the prompts list.1170 metadata: List of metadata dictionaries to associate with each prompt. If1171 provided, the length of the list must match the length of the prompts1172 list.1173 run_name: List of run names to associate with each prompt. If provided, the1174 length of the list must match the length of the prompts list.1175 run_id: List of run IDs to associate with each prompt. If provided, the1176 length of the list must match the length of the prompts list.1177 **kwargs: Arbitrary additional keyword arguments.11781179 These are usually passed to the model provider API call.11801181 Raises:1182 ValueError: If the length of `callbacks`, `tags`, `metadata`, or1183 `run_name` (if provided) does not match the length of prompts.11841185 Returns:1186 An `LLMResult`, which contains a list of candidate `Generations` for each1187 input prompt and additional model provider-specific output.1188 """1189 if isinstance(metadata, list):1190 metadata = [1191 {1192 **(meta or {}),1193 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1194 }1195 for meta in metadata1196 ]1197 elif isinstance(metadata, dict):1198 metadata = {1199 **(metadata or {}),1200 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1201 }1202 # Create callback managers1203 if isinstance(callbacks, list) and (1204 isinstance(callbacks[0], (list, BaseCallbackManager))1205 or callbacks[0] is None1206 ):1207 # We've received a list of callbacks args to apply to each input1208 if len(callbacks) != len(prompts):1209 msg = "callbacks must be the same length as prompts"1210 raise ValueError(msg)1211 if tags is not None and not (1212 isinstance(tags, list) and len(tags) == len(prompts)1213 ):1214 msg = "tags must be a list of the same length as prompts"1215 raise ValueError(msg)1216 if metadata is not None and not (1217 isinstance(metadata, list) and len(metadata) == len(prompts)1218 ):1219 msg = "metadata must be a list of the same length as prompts"1220 raise ValueError(msg)1221 if run_name is not None and not (1222 isinstance(run_name, list) and len(run_name) == len(prompts)1223 ):1224 msg = "run_name must be a list of the same length as prompts"1225 raise ValueError(msg)1226 callbacks = cast("list[Callbacks]", callbacks)1227 tags_list = cast("list[list[str] | None]", tags or ([None] * len(prompts)))1228 metadata_list = cast(1229 "list[dict[str, Any] | None]", metadata or ([{}] * len(prompts))1230 )1231 run_name_list = run_name or cast(1232 "list[str | None]", ([None] * len(prompts))1233 )1234 params = self._dict_for_compat()1235 params["stop"] = stop1236 callback_managers = [1237 AsyncCallbackManager.configure(1238 callback,1239 self.callbacks,1240 self.verbose,1241 tag,1242 self.tags,1243 meta,1244 self.metadata,1245 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1246 params1247 ),1248 )1249 for callback, tag, meta in zip(1250 callbacks, tags_list, metadata_list, strict=False1251 )1252 ]1253 else:1254 # We've received a single callbacks arg to apply to all inputs1255 params = self._dict_for_compat()1256 params["stop"] = stop1257 callback_managers = [1258 AsyncCallbackManager.configure(1259 cast("Callbacks", callbacks),1260 self.callbacks,1261 self.verbose,1262 cast("list[str]", tags),1263 self.tags,1264 cast("dict[str, Any]", metadata),1265 self.metadata,1266 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1267 params1268 ),1269 )1270 ] * len(prompts)1271 run_name_list = [cast("str | None", run_name)] * len(prompts)1272 run_ids_list = self._get_run_ids_list(run_id, prompts)1273 options = {"stop": stop}1274 (1275 existing_prompts,1276 llm_string,1277 missing_prompt_idxs,1278 missing_prompts,1279 ) = await aget_prompts(params, prompts, self.cache)12801281 # Verify whether the cache is set, and if the cache is set,1282 # verify whether the cache is available.1283 new_arg_supported = inspect.signature(self._agenerate).parameters.get(1284 "run_manager"1285 )1286 if (self.cache is None and get_llm_cache() is None) or self.cache is False:1287 run_managers = await asyncio.gather(1288 *[1289 callback_manager.on_llm_start(1290 self._serialized,1291 [prompt],1292 invocation_params=params,1293 options=options,1294 name=run_name,1295 batch_size=len(prompts),1296 run_id=run_id_,1297 )1298 for callback_manager, prompt, run_name, run_id_ in zip(1299 callback_managers,1300 prompts,1301 run_name_list,1302 run_ids_list,1303 strict=False,1304 )1305 ]1306 )1307 run_managers = [r[0] for r in run_managers] # type: ignore[misc]1308 return await self._agenerate_helper(1309 prompts,1310 stop,1311 run_managers, # type: ignore[arg-type]1312 new_arg_supported=bool(new_arg_supported),1313 **kwargs,1314 )1315 if len(missing_prompts) > 0:1316 run_managers = await asyncio.gather(1317 *[1318 callback_managers[idx].on_llm_start(1319 self._serialized,1320 [prompts[idx]],1321 invocation_params=params,1322 options=options,1323 name=run_name_list[idx],1324 batch_size=len(missing_prompts),1325 )1326 for idx in missing_prompt_idxs1327 ]1328 )1329 run_managers = [r[0] for r in run_managers] # type: ignore[misc]1330 new_results = await self._agenerate_helper(1331 missing_prompts,1332 stop,1333 run_managers, # type: ignore[arg-type]1334 new_arg_supported=bool(new_arg_supported),1335 **kwargs,1336 )1337 llm_output = await aupdate_cache(1338 self.cache,1339 existing_prompts,1340 llm_string,1341 missing_prompt_idxs,1342 new_results,1343 prompts,1344 )1345 run_info = (1346 [RunInfo(run_id=run_manager.run_id) for run_manager in run_managers] # type: ignore[attr-defined]1347 if run_managers1348 else None1349 )1350 else:1351 llm_output = {}1352 run_info = None1353 generations = [existing_prompts[i] for i in range(len(prompts))]1354 return LLMResult(generations=generations, llm_output=llm_output, run=run_info)13551356 async def _call_async(1357 self,1358 prompt: str,1359 stop: list[str] | None = None,1360 callbacks: Callbacks = None,1361 *,1362 tags: list[str] | None = None,1363 metadata: builtins.dict[str, Any] | None = None,1364 **kwargs: Any,1365 ) -> str:1366 """Check Cache and run the LLM on the given prompt and input."""1367 result = await self.agenerate(1368 [prompt],1369 stop=stop,1370 callbacks=callbacks,1371 tags=tags,1372 metadata=metadata,1373 **kwargs,1374 )1375 return result.generations[0][0].text13761377 def __str__(self) -> str:1378 """Return a string representation of the object for printing."""1379 cls_name = f"\033[1m{self.__class__.__name__}\033[0m"1380 return f"{cls_name}\nParams: {self._identifying_params}"13811382 @property1383 @abstractmethod1384 def _llm_type(self) -> str:1385 """Return type of llm."""13861387 @deprecated("1.4.2", alternative="asdict", removal="2.0.0")1388 @override1389 def dict(self, **_kwargs: Any) -> builtins.dict[str, Any]:1390 """DEPRECATED - use `asdict()` instead.13911392 Return a dictionary representation of the LLM.1393 """1394 return self.asdict()13951396 def asdict(self) -> builtins.dict[str, Any]:1397 """Return a dictionary representation of the LLM."""1398 starter_dict = dict(self._identifying_params)1399 starter_dict["_type"] = self._llm_type1400 return starter_dict14011402 def _dict_for_compat(self) -> builtins.dict[str, Any]:1403 """Return the LLM dictionary while preserving deprecated overrides."""1404 with suppress_langchain_deprecation_warning():1405 return self.dict()14061407 def save(self, file_path: Path | str) -> None:1408 """Save the LLM.14091410 Args:1411 file_path: Path to file to save the LLM to.14121413 Raises:1414 ValueError: If the file path is not a string or Path object.14151416 Example:1417 ```python1418 llm.save(file_path="path/llm.yaml")1419 ```1420 """1421 # Convert file to Path object.1422 save_path = Path(file_path)14231424 directory_path = save_path.parent1425 directory_path.mkdir(parents=True, exist_ok=True)14261427 # Fetch dictionary to save1428 prompt_dict = self._dict_for_compat()14291430 if save_path.suffix == ".json":1431 with save_path.open("w", encoding="utf-8") as f:1432 json.dump(prompt_dict, f, indent=4)1433 elif save_path.suffix.endswith((".yaml", ".yml")):1434 with save_path.open("w", encoding="utf-8") as f:1435 yaml.dump(prompt_dict, f, default_flow_style=False)1436 else:1437 msg = f"{save_path} must be json or yaml"1438 raise ValueError(msg)143914401441class LLM(BaseLLM):1442 """Simple interface for implementing a custom LLM.14431444 You should subclass this class and implement the following:14451446 - `_call` method: Run the LLM on the given prompt and input (used by `invoke`).1447 - `_identifying_params` property: Return a dictionary of the identifying parameters1448 This is critical for caching and tracing purposes. Identifying parameters1449 is a dict that identifies the LLM.1450 It should mostly include a `model_name`.14511452 Optional: Override the following methods to provide more optimizations:14531454 - `_acall`: Provide a native async version of the `_call` method.1455 If not provided, will delegate to the synchronous version using1456 `run_in_executor`. (Used by `ainvoke`).1457 - `_stream`: Stream the LLM on the given prompt and input.1458 `stream` will use `_stream` if provided, otherwise it1459 use `_call` and output will arrive in one chunk.1460 - `_astream`: Override to provide a native async version of the `_stream` method.1461 `astream` will use `_astream` if provided, otherwise it will implement1462 a fallback behavior that will use `_stream` if `_stream` is implemented,1463 and use `_acall` if `_stream` is not implemented.1464 """14651466 @abstractmethod1467 def _call(1468 self,1469 prompt: str,1470 stop: list[str] | None = None,1471 run_manager: CallbackManagerForLLMRun | None = None,1472 **kwargs: Any,1473 ) -> str:1474 """Run the LLM on the given input.14751476 Override this method to implement the LLM logic.14771478 Args:1479 prompt: The prompt to generate from.1480 stop: Stop words to use when generating.14811482 Model output is cut off at the first occurrence of any of these1483 substrings.14841485 If stop tokens are not supported consider raising `NotImplementedError`.1486 run_manager: Callback manager for the run.1487 **kwargs: Arbitrary additional keyword arguments.14881489 These are usually passed to the model provider API call.14901491 Returns:1492 The model output as a string. SHOULD NOT include the prompt.1493 """14941495 async def _acall(1496 self,1497 prompt: str,1498 stop: list[str] | None = None,1499 run_manager: AsyncCallbackManagerForLLMRun | None = None,1500 **kwargs: Any,1501 ) -> str:1502 """Async version of the _call method.15031504 The default implementation delegates to the synchronous _call method using1505 `run_in_executor`. Subclasses that need to provide a true async implementation1506 should override this method to reduce the overhead of using `run_in_executor`.15071508 Args:1509 prompt: The prompt to generate from.1510 stop: Stop words to use when generating.15111512 Model output is cut off at the first occurrence of any of these1513 substrings.15141515 If stop tokens are not supported consider raising `NotImplementedError`.1516 run_manager: Callback manager for the run.1517 **kwargs: Arbitrary additional keyword arguments.15181519 These are usually passed to the model provider API call.15201521 Returns:1522 The model output as a string. SHOULD NOT include the prompt.1523 """1524 return await run_in_executor(1525 None,1526 self._call,1527 prompt,1528 stop,1529 run_manager.get_sync() if run_manager else None,1530 **kwargs,1531 )15321533 def _generate(1534 self,1535 prompts: list[str],1536 stop: list[str] | None = None,1537 run_manager: CallbackManagerForLLMRun | None = None,1538 **kwargs: Any,1539 ) -> LLMResult:1540 # TODO: add caching here.1541 generations = []1542 new_arg_supported = inspect.signature(self._call).parameters.get("run_manager")1543 for prompt in prompts:1544 text = (1545 self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)1546 if new_arg_supported1547 else self._call(prompt, stop=stop, **kwargs)1548 )1549 generations.append([Generation(text=text)])1550 return LLMResult(generations=generations)15511552 async def _agenerate(1553 self,1554 prompts: list[str],1555 stop: list[str] | None = None,1556 run_manager: AsyncCallbackManagerForLLMRun | None = None,1557 **kwargs: Any,1558 ) -> LLMResult:1559 generations = []1560 new_arg_supported = inspect.signature(self._acall).parameters.get("run_manager")1561 for prompt in prompts:1562 text = (1563 await self._acall(prompt, stop=stop, run_manager=run_manager, **kwargs)1564 if new_arg_supported1565 else await self._acall(prompt, stop=stop, **kwargs)1566 )1567 generations.append([Generation(text=text)])1568 return LLMResult(generations=generations)
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.