Overuse may indicate design issues; consider polymorphism
if isinstance(document.metadata[field], float):
1import datetime2from copy import deepcopy3from typing import Any45from langchain_core.callbacks import (6 AsyncCallbackManagerForRetrieverRun,7 CallbackManagerForRetrieverRun,8)9from langchain_core.documents import Document10from langchain_core.retrievers import BaseRetriever11from langchain_core.vectorstores import VectorStore12from pydantic import ConfigDict, Field13from typing_extensions import override141516def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:17 """Get the hours passed between two datetimes."""18 return (time - ref_time).total_seconds() / 3600192021class TimeWeightedVectorStoreRetriever(BaseRetriever):22 """Time Weighted Vector Store Retriever.2324 Retriever that combines embedding similarity with recency in retrieving values.25 """2627 vectorstore: VectorStore28 """The `VectorStore` to store documents and determine salience."""2930 search_kwargs: dict = Field(default_factory=lambda: {"k": 100})31 """Keyword arguments to pass to the `VectorStore` similarity search."""3233 # TODO: abstract as a queue34 memory_stream: list[Document] = Field(default_factory=list)35 """The memory_stream of documents to search through."""3637 decay_rate: float = Field(default=0.01)38 """The exponential decay factor used as `(1.0-decay_rate)**(hrs_passed)`."""3940 k: int = 441 """The maximum number of documents to retrieve in a given call."""4243 other_score_keys: list[str] = []44 """Other keys in the metadata to factor into the score, e.g. 'importance'."""4546 default_salience: float | None = None47 """The salience to assign memories not retrieved from the vector store.4849 None assigns no salience to documents not fetched from the vector store.50 """5152 model_config = ConfigDict(53 arbitrary_types_allowed=True,54 )5556 def _document_get_date(self, field: str, document: Document) -> datetime.datetime:57 """Return the value of the date field of a document."""58 if field in document.metadata:59 if isinstance(document.metadata[field], float):60 return datetime.datetime.fromtimestamp(document.metadata[field])61 return document.metadata[field]62 return datetime.datetime.now()6364 def _get_combined_score(65 self,66 document: Document,67 vector_relevance: float | None,68 current_time: datetime.datetime,69 ) -> float:70 """Return the combined score for a document."""71 hours_passed = _get_hours_passed(72 current_time,73 self._document_get_date("last_accessed_at", document),74 )75 score = (1.0 - self.decay_rate) ** hours_passed76 for key in self.other_score_keys:77 if key in document.metadata:78 score += document.metadata[key]79 if vector_relevance is not None:80 score += vector_relevance81 return score8283 def get_salient_docs(self, query: str) -> dict[int, tuple[Document, float]]:84 """Return documents that are salient to the query."""85 docs_and_scores: list[tuple[Document, float]]86 docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(87 query,88 **self.search_kwargs,89 )90 results = {}91 for fetched_doc, relevance in docs_and_scores:92 if "buffer_idx" in fetched_doc.metadata:93 buffer_idx = fetched_doc.metadata["buffer_idx"]94 doc = self.memory_stream[buffer_idx]95 results[buffer_idx] = (doc, relevance)96 return results9798 async def aget_salient_docs(self, query: str) -> dict[int, tuple[Document, float]]:99 """Return documents that are salient to the query."""100 docs_and_scores: list[tuple[Document, float]]101 docs_and_scores = (102 await self.vectorstore.asimilarity_search_with_relevance_scores(103 query,104 **self.search_kwargs,105 )106 )107 results = {}108 for fetched_doc, relevance in docs_and_scores:109 if "buffer_idx" in fetched_doc.metadata:110 buffer_idx = fetched_doc.metadata["buffer_idx"]111 doc = self.memory_stream[buffer_idx]112 results[buffer_idx] = (doc, relevance)113 return results114115 def _get_rescored_docs(116 self,117 docs_and_scores: dict[Any, tuple[Document, float | None]],118 ) -> list[Document]:119 current_time = datetime.datetime.now()120 rescored_docs = [121 (doc, self._get_combined_score(doc, relevance, current_time))122 for doc, relevance in docs_and_scores.values()123 ]124 rescored_docs.sort(key=lambda x: x[1], reverse=True)125 result = []126 # Ensure frequently accessed memories aren't forgotten127 for doc, _ in rescored_docs[: self.k]:128 # TODO: Update vector store doc once `update` method is exposed.129 buffered_doc = self.memory_stream[doc.metadata["buffer_idx"]]130 buffered_doc.metadata["last_accessed_at"] = current_time131 result.append(buffered_doc)132 return result133134 @override135 def _get_relevant_documents(136 self,137 query: str,138 *,139 run_manager: CallbackManagerForRetrieverRun,140 ) -> list[Document]:141 docs_and_scores = {142 doc.metadata["buffer_idx"]: (doc, self.default_salience)143 for doc in self.memory_stream[-self.k :]144 }145 # If a doc is considered salient, update the salience score146 docs_and_scores.update(self.get_salient_docs(query))147 return self._get_rescored_docs(docs_and_scores)148149 @override150 async def _aget_relevant_documents(151 self,152 query: str,153 *,154 run_manager: AsyncCallbackManagerForRetrieverRun,155 ) -> list[Document]:156 docs_and_scores = {157 doc.metadata["buffer_idx"]: (doc, self.default_salience)158 for doc in self.memory_stream[-self.k :]159 }160 # If a doc is considered salient, update the salience score161 docs_and_scores.update(await self.aget_salient_docs(query))162 return self._get_rescored_docs(docs_and_scores)163164 def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:165 """Add documents to vectorstore."""166 current_time = kwargs.get("current_time")167 if current_time is None:168 current_time = datetime.datetime.now()169 # Avoid mutating input documents170 dup_docs = [deepcopy(d) for d in documents]171 for i, doc in enumerate(dup_docs):172 if "last_accessed_at" not in doc.metadata:173 doc.metadata["last_accessed_at"] = current_time174 if "created_at" not in doc.metadata:175 doc.metadata["created_at"] = current_time176 doc.metadata["buffer_idx"] = len(self.memory_stream) + i177 self.memory_stream.extend(dup_docs)178 return self.vectorstore.add_documents(dup_docs, **kwargs)179180 async def aadd_documents(181 self,182 documents: list[Document],183 **kwargs: Any,184 ) -> list[str]:185 """Add documents to vectorstore."""186 current_time = kwargs.get("current_time")187 if current_time is None:188 current_time = datetime.datetime.now()189 # Avoid mutating input documents190 dup_docs = [deepcopy(d) for d in documents]191 for i, doc in enumerate(dup_docs):192 if "last_accessed_at" not in doc.metadata:193 doc.metadata["last_accessed_at"] = current_time194 if "created_at" not in doc.metadata:195 doc.metadata["created_at"] = current_time196 doc.metadata["buffer_idx"] = len(self.memory_stream) + i197 self.memory_stream.extend(dup_docs)198 return await self.vectorstore.aadd_documents(dup_docs, **kwargs)
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.