Ensure functions have docstrings for documentation
def all_package_dirs() -> Set[str]:
1"""Analyze git diffs to determine which directories need to be tested.23Intelligently determines which LangChain packages and directories need to be tested,4linted, or built based on the changes. Handles dependency relationships between5packages, maps file changes to appropriate CI job configurations, and outputs JSON6configurations for GitHub Actions.78- Maps changed files to affected package directories (libs/core, libs/partners/*, etc.)9- Builds dependency graph to include dependent packages when core components change10- Generates test matrix configurations with appropriate Python versions11- Handles special cases for Pydantic version testing and performance benchmarks1213Used as part of the check_diffs workflow.14"""1516import glob17import json18import os19import sys20from collections import defaultdict21from pathlib import Path22from typing import Dict, List, Set2324import tomllib25from get_min_versions import get_min_version_from_toml26from packaging.requirements import Requirement2728LANGCHAIN_DIRS = [29 "libs/core",30 "libs/text-splitters",31 "libs/langchain",32 "libs/langchain_v1",33 "libs/model-profiles",34]3536# Packages with VCR cassette-backed integration tests.37# These get a playback-only CI check to catch stale cassettes.38VCR_PACKAGES = {39 "libs/partners/openai",40}4142# When set to True, we are ignoring core dependents43# in order to be able to get CI to pass for each individual44# package that depends on core45# e.g. if you touch core, we don't then add textsplitters/etc to CI46IGNORE_CORE_DEPENDENTS = False4748# Ignored partners are removed from dependents but still run if directly edited49IGNORED_PARTNERS = [50 # remove huggingface from dependents because of CI instability51 # specifically in huggingface jobs52 "huggingface",53]545556def all_package_dirs() -> Set[str]:57 return {58 "/".join(path.split("/")[:-1]).lstrip("./")59 for path in glob.glob("./libs/**/pyproject.toml", recursive=True)60 if "libs/standard-tests" not in path61 }626364def dependents_graph() -> dict:65 """Construct a mapping of package -> dependents6667 Done such that we can run tests on all dependents of a package when a change is made.68 """69 dependents = defaultdict(set)7071 for path in glob.glob("./libs/**/pyproject.toml", recursive=True):72 if "template" in path:73 continue7475 # load regular and test deps from pyproject.toml76 with open(path, "rb") as f:77 pyproject = tomllib.load(f)7879 pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])80 for dep in [81 *pyproject["project"]["dependencies"],82 *pyproject["dependency-groups"]["test"],83 ]:84 requirement = Requirement(dep)85 package_name = requirement.name86 if "langchain" in dep:87 dependents[package_name].add(pkg_dir)88 continue8990 # load extended deps from extended_testing_deps.txt91 package_path = Path(path).parent92 extended_requirement_path = package_path / "extended_testing_deps.txt"93 if extended_requirement_path.exists():94 with open(extended_requirement_path, "r") as f:95 extended_deps = f.read().splitlines()96 for depline in extended_deps:97 if depline.startswith("-e "):98 # editable dependency99 assert depline.startswith("-e ../partners/"), (100 "Extended test deps should only editable install partner packages"101 )102 partner = depline.split("partners/")[1]103 dep = f"langchain-{partner}"104 else:105 dep = depline.split("==")[0]106107 if "langchain" in dep:108 dependents[dep].add(pkg_dir)109110 for k in dependents:111 for partner in IGNORED_PARTNERS:112 if f"libs/partners/{partner}" in dependents[k]:113 dependents[k].remove(f"libs/partners/{partner}")114 return dependents115116117def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:118 updated = set()119 for dir_ in dirs_to_eval:120 # handle core manually because it has so many dependents121 if "core" in dir_:122 updated.add(dir_)123 continue124 pkg = "langchain-" + dir_.split("/")[-1]125 updated.update(dependents[pkg])126 updated.add(dir_)127 return list(updated)128129130def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:131 if job == "test-pydantic":132 return _get_pydantic_test_configs(dir_)133134 if job == "codspeed":135 # CPU simulation (<1% variance, Valgrind-based) is the default.136 # Partners with heavy SDK inits use walltime instead to keep CI fast.137 CODSPEED_WALLTIME_DIRS = {138 "libs/core",139 "libs/partners/fireworks", # ~328s under simulation140 "libs/partners/openai", # 6 benchmarks, ~6 min under simulation141 }142 mode = "walltime" if dir_ in CODSPEED_WALLTIME_DIRS else "simulation"143 return [144 {145 "working-directory": dir_,146 "python-version": "3.13",147 "codspeed-mode": mode,148 }149 ]150 if dir_ == "libs/core":151 py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]152 else:153 py_versions = ["3.10", "3.14"]154155 return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions]156157158def _get_pydantic_test_configs(159 dir_: str, *, python_version: str = "3.12"160) -> List[Dict[str, str]]:161 with open("./libs/core/uv.lock", "rb") as f:162 core_uv_lock_data = tomllib.load(f)163 for package in core_uv_lock_data["package"]:164 if package["name"] == "pydantic":165 core_max_pydantic_minor = package["version"].split(".")[1]166 break167168 with open(f"./{dir_}/uv.lock", "rb") as f:169 dir_uv_lock_data = tomllib.load(f)170171 for package in dir_uv_lock_data["package"]:172 if package["name"] == "pydantic":173 dir_max_pydantic_minor = package["version"].split(".")[1]174 break175176 core_min_pydantic_version = get_min_version_from_toml(177 "./libs/core/pyproject.toml", "release", python_version, include=["pydantic"]178 )["pydantic"]179 core_min_pydantic_minor = (180 core_min_pydantic_version.split(".")[1]181 if "." in core_min_pydantic_version182 else "0"183 )184 dir_min_pydantic_version = get_min_version_from_toml(185 f"./{dir_}/pyproject.toml", "release", python_version, include=["pydantic"]186 ).get("pydantic", "0.0.0")187 dir_min_pydantic_minor = (188 dir_min_pydantic_version.split(".")[1]189 if "." in dir_min_pydantic_version190 else "0"191 )192193 max_pydantic_minor = min(194 int(dir_max_pydantic_minor),195 int(core_max_pydantic_minor),196 )197 min_pydantic_minor = max(198 int(dir_min_pydantic_minor),199 int(core_min_pydantic_minor),200 )201202 configs = [203 {204 "working-directory": dir_,205 "pydantic-version": f"2.{v}.0",206 "python-version": python_version,207 }208 for v in range(min_pydantic_minor, max_pydantic_minor + 1)209 ]210 return configs211212213def _get_configs_for_multi_dirs(214 job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict215) -> List[Dict[str, str]]:216 if job == "lint":217 dirs = add_dependents(218 dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],219 dependents,220 )221 elif job in ["test", "compile-integration-tests", "dependencies", "test-pydantic"]:222 dirs = add_dependents(223 dirs_to_run["test"] | dirs_to_run["extended-test"], dependents224 )225 elif job == "extended-tests":226 dirs = list(dirs_to_run["extended-test"])227 elif job == "codspeed":228 dirs = list(dirs_to_run["codspeed"])229 elif job == "vcr-tests":230 # Only run VCR tests for packages that have cassettes and are affected231 all_affected = set(232 add_dependents(233 dirs_to_run["test"] | dirs_to_run["extended-test"], dependents234 )235 )236 dirs = [d for d in VCR_PACKAGES if d in all_affected]237 else:238 raise ValueError(f"Unknown job: {job}")239240 return [241 config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_)242 ]243244245def _get_changed_files(args: list[str]) -> list[str]:246 """Parse changed files from command-line arguments.247248 Args:249 args: Either a legacy list of filename arguments or a single JSON array250 produced by `Ana06/get-changed-files` with `format: json`.251252 Returns:253 List of changed files.254255 Raises:256 ValueError: If a single argument looks like JSON but is not a string array.257 """258 if len(args) != 1:259 return args260261 value = args[0].strip()262 if not value.startswith("[") or not value.endswith("]"):263 return args264265 try:266 parsed = json.loads(value)267 except json.JSONDecodeError as e:268 msg = "Expected changed files JSON to be a list of strings."269 raise ValueError(msg) from e270271 if not isinstance(parsed, list) or not all(272 isinstance(file, str) for file in parsed273 ):274 msg = "Expected changed files JSON to be a list of strings."275 raise ValueError(msg)276 return parsed277278279if __name__ == "__main__":280 files = _get_changed_files(sys.argv[1:])281282 dirs_to_run: Dict[str, set] = {283 "lint": set(),284 "test": set(),285 "extended-test": set(),286 "codspeed": set(),287 }288 docs_edited = False289290 if len(files) >= 300:291 # max diff length is 300 files - there are likely files missing292 dirs_to_run["lint"] = all_package_dirs()293 dirs_to_run["test"] = all_package_dirs()294 dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS)295296 for file in files:297 if any(298 file.startswith(dir_)299 for dir_ in (300 ".github/workflows",301 ".github/tools",302 ".github/actions",303 ".github/scripts/check_diff.py",304 )305 ):306 # Infrastructure changes (workflows, actions, CI scripts) trigger tests on307 # all core packages as a safety measure. This ensures that changes to CI/CD308 # infrastructure don't inadvertently break package testing, even if the change309 # appears unrelated (e.g., documentation build workflows). This is intentionally310 # conservative to catch unexpected side effects from workflow modifications.311 #312 # Example: A PR modifying .github/workflows/api_doc_build.yml will trigger313 # lint/test jobs for libs/core, libs/text-splitters, libs/langchain, and314 # libs/langchain_v1, even though the workflow may only affect documentation.315 dirs_to_run["extended-test"].update(LANGCHAIN_DIRS)316317 if file.startswith("libs/core"):318 dirs_to_run["codspeed"].add("libs/core")319 if file.startswith("libs/langchain_v1"):320 dirs_to_run["codspeed"].add("libs/langchain_v1")321 if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):322 # add that dir and all dirs after in LANGCHAIN_DIRS323 # for extended testing324325 found = False326 for dir_ in LANGCHAIN_DIRS:327 if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:328 dirs_to_run["extended-test"].add(dir_)329 continue330 if file.startswith(dir_):331 found = True332 if found:333 dirs_to_run["extended-test"].add(dir_)334 elif file.startswith("libs/standard-tests"):335 # TODO: update to include all packages that rely on standard-tests (all partner packages)336 # Note: won't run on external repo partners337 dirs_to_run["lint"].add("libs/standard-tests")338 dirs_to_run["test"].add("libs/standard-tests")339 dirs_to_run["test"].add("libs/partners/mistralai")340 dirs_to_run["test"].add("libs/partners/openai")341 dirs_to_run["test"].add("libs/partners/anthropic")342 dirs_to_run["test"].add("libs/partners/fireworks")343 dirs_to_run["test"].add("libs/partners/groq")344345 elif file.startswith("libs/partners"):346 partner_dir = file.split("/")[2]347 if os.path.isdir(f"libs/partners/{partner_dir}") and [348 filename349 for filename in os.listdir(f"libs/partners/{partner_dir}")350 if not filename.startswith(".")351 ] != ["README.md"]:352 dirs_to_run["test"].add(f"libs/partners/{partner_dir}")353 # Only add to codspeed if the partner has benchmarks and is not ignored354 if (355 partner_dir not in IGNORED_PARTNERS356 and os.path.isdir(357 f"libs/partners/{partner_dir}/tests/benchmarks"358 )359 ):360 dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}")361 # Skip if the directory was deleted or is just a tombstone readme362 elif file.startswith("libs/"):363 # Check if this is a root-level file in libs/ (e.g., libs/README.md)364 file_parts = file.split("/")365 if len(file_parts) == 2:366 # Root-level file in libs/, skip it (no tests needed)367 continue368 raise ValueError(369 f"Unknown lib: {file}. check_diff.py likely needs "370 "an update for this new library!"371 )372 elif file in [373 "pyproject.toml",374 "uv.lock",375 ]: # root uv files376 docs_edited = True377378 dependents = dependents_graph()379380 # we now have dirs_by_job381 # todo: clean this up382 map_job_to_configs = {383 job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents)384 for job in [385 "lint",386 "test",387 "extended-tests",388 "compile-integration-tests",389 "dependencies",390 "test-pydantic",391 "codspeed",392 "vcr-tests",393 ]394 }395396 for key, value in map_job_to_configs.items():397 json_output = json.dumps(value)398 print(f"{key}={json_output}")
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.