Avoid unnecessary list conversions; use generators where possible
return list(self.lazy_load())
1"""Abstract interface for document loader implementations."""23from __future__ import annotations45from abc import ABC, abstractmethod6from typing import TYPE_CHECKING78from langchain_core.runnables import run_in_executor910if TYPE_CHECKING:11 from collections.abc import AsyncIterator, Iterator1213 from langchain_text_splitters import TextSplitter1415 from langchain_core.documents import Document16 from langchain_core.documents.base import Blob1718try:19 from langchain_text_splitters import RecursiveCharacterTextSplitter2021 _HAS_TEXT_SPLITTERS = True22except ImportError:23 _HAS_TEXT_SPLITTERS = False242526class BaseLoader(ABC): # noqa: B02427 """Interface for document loader.2829 Implementations should implement the lazy-loading method using generators to avoid30 loading all documents into memory at once.3132 `load` is provided just for user convenience and should not be overridden.33 """3435 # Sub-classes should not implement this method directly. Instead, they36 # should implement the lazy load method.37 def load(self) -> list[Document]:38 """Load data into `Document` objects.3940 Returns:41 The documents.42 """43 return list(self.lazy_load())4445 async def aload(self) -> list[Document]:46 """Load data into `Document` objects.4748 Returns:49 The documents.50 """51 return [document async for document in self.alazy_load()]5253 def load_and_split(54 self, text_splitter: TextSplitter | None = None55 ) -> list[Document]:56 """Load `Document` and split into chunks. Chunks are returned as `Document`.5758 !!! danger5960 Do not override this method. It should be considered to be deprecated!6162 Args:63 text_splitter: `TextSplitter` instance to use for splitting documents.6465 Defaults to `RecursiveCharacterTextSplitter`.6667 Raises:68 ImportError: If `langchain-text-splitters` is not installed and no69 `text_splitter` is provided.7071 Returns:72 List of `Document` objects.73 """74 if text_splitter is None:75 if not _HAS_TEXT_SPLITTERS:76 msg = (77 "Unable to import from langchain_text_splitters. Please specify "78 "text_splitter or install langchain_text_splitters with "79 "`pip install -U langchain-text-splitters`."80 )81 raise ImportError(msg)8283 text_splitter_: TextSplitter = RecursiveCharacterTextSplitter()84 else:85 text_splitter_ = text_splitter86 docs = self.load()87 return text_splitter_.split_documents(docs)8889 # Attention: This method will be upgraded into an abstractmethod once it's90 # implemented in all the existing subclasses.91 def lazy_load(self) -> Iterator[Document]:92 """A lazy loader for `Document`.9394 Yields:95 The `Document` objects.96 """97 if type(self).load != BaseLoader.load:98 return iter(self.load())99 msg = f"{self.__class__.__name__} does not implement lazy_load()"100 raise NotImplementedError(msg)101102 async def alazy_load(self) -> AsyncIterator[Document]:103 """A lazy loader for `Document`.104105 Yields:106 The `Document` objects.107 """108 iterator = await run_in_executor(None, self.lazy_load)109 done = object()110 while True:111 doc = await run_in_executor(None, next, iterator, done)112 if doc is done:113 break114 yield doc # type: ignore[misc]115116117class BaseBlobParser(ABC):118 """Abstract interface for blob parsers.119120 A blob parser provides a way to parse raw data stored in a blob into one or more121 `Document` objects.122123 The parser can be composed with blob loaders, making it easy to reuse a parser124 independent of how the blob was originally loaded.125 """126127 @abstractmethod128 def lazy_parse(self, blob: Blob) -> Iterator[Document]:129 """Lazy parsing interface.130131 Subclasses are required to implement this method.132133 Args:134 blob: `Blob` instance135136 Returns:137 Generator of `Document` objects138 """139140 def parse(self, blob: Blob) -> list[Document]:141 """Eagerly parse the blob into a `Document` or list of `Document` objects.142143 This is a convenience method for interactive development environment.144145 Production applications should favor the `lazy_parse` method instead.146147 Subclasses should generally not over-ride this parse method.148149 Args:150 blob: `Blob` instance151152 Returns:153 List of `Document` objects154 """155 return list(self.lazy_parse(blob))
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.