scripts/translate.py PYTHON 486 lines View on github.com → Search inside
1import json2import secrets3import subprocess4from collections.abc import Iterable5from functools import lru_cache6from os import sep as pathsep7from pathlib import Path8from typing import Annotated910import git11import typer12import yaml13from doc_parsing_utils import check_translation14from github import Github15from pydantic_ai import Agent16from rich import print1718non_translated_sections = (19    f"reference{pathsep}",20    "release-notes.md",21    "fastapi-people.md",22    "external-links.md",23    "newsletter.md",24    "management-tasks.md",25    "management.md",26    "contributing.md",27    "translations.md",28)2930general_prompt_path = Path(__file__).absolute().parent / "general-llm-prompt.md"31general_prompt = general_prompt_path.read_text(encoding="utf-8")3233app = typer.Typer()343536@lru_cache37def get_langs() -> dict[str, str]:38    return yaml.safe_load(Path("docs/language_names.yml").read_text(encoding="utf-8"))394041def generate_lang_path(*, lang: str, path: Path) -> Path:42    en_docs_path = Path("docs/en/docs")43    assert str(path).startswith(str(en_docs_path)), (44        f"Path must be inside {en_docs_path}"45    )46    lang_docs_path = Path(f"docs/{lang}/docs")47    out_path = Path(str(path).replace(str(en_docs_path), str(lang_docs_path)))48    return out_path495051def generate_en_path(*, lang: str, path: Path) -> Path:52    en_docs_path = Path("docs/en/docs")53    assert not str(path).startswith(str(en_docs_path)), (54        f"Path must not be inside {en_docs_path}"55    )56    lang_docs_path = Path(f"docs/{lang}/docs")57    out_path = Path(str(path).replace(str(lang_docs_path), str(en_docs_path)))58    return out_path596061def get_prompt(62    lang_prompt_content: str,63    old_translation: str | None,64    language: str,65    language_name: str,66    original_content: str,67    additional_instructions: str,68) -> str:69    general_prompt_with_additional_instructions = general_prompt.replace(70        "[placeholder_for_additional_instructions]", additional_instructions71    )72    prompt_segments = [73        general_prompt_with_additional_instructions,74        lang_prompt_content,75    ]76    if old_translation:77        prompt_segments.extend(78            [79                "There is an existing previous translation for the original English content, that may be outdated.",80                "Update the translation only where necessary:",81                "- If the original English content has added parts, also add these parts to the translation.",82                "- If the original English content has removed parts, also remove them from the translation, unless you were instructed earlier to not do that in specific cases.",83                "- If parts of the original English content have changed, also change those parts in the translation.",84                "- If the previous translation violates current instructions, update it.",85                "- Otherwise, preserve the original translation LINE-BY-LINE, AS-IS.",86                "Do not:",87                "- rephrase or rewrite correct lines just to improve the style.",88                "- add or remove line breaks, unless the original English content changed.",89                "- change formatting or whitespace unless absolutely required.",90                "Only change what must be changed. The goal is to minimize diffs for easier human review.",91                "UNLESS you were instructed earlier to behave different, there MUST NOT be whole sentences or partial sentences in the updated translation, which are not in the original English content, and there MUST NOT be whole sentences or partial sentences in the original English content, which are not in the updated translation. Remember: the updated translation shall be IN SYNC with the original English content.",92                "Previous translation:",93                f"%%%\n{old_translation}%%%",94            ]95        )96    prompt_segments.extend(97        [98            f"Translate to {language} ({language_name}).",99            "Original content:",100            f"%%%\n{original_content}%%%",101        ]102    )103    return "\n\n".join(prompt_segments)104105106@app.command()107def translate_page(108    *,109    language: Annotated[str, typer.Option(envvar="LANGUAGE")],110    en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],111) -> None:112    assert language != "en", (113        "`en` is the source language, choose another language as translation target"114    )115    langs = get_langs()116    language_name = langs[language]117    lang_path = Path(f"docs/{language}")118    lang_path.mkdir(exist_ok=True)119    lang_prompt_path = lang_path / "llm-prompt.md"120    assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"121    lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")122123    en_docs_path = Path("docs/en/docs")124    assert str(en_path).startswith(str(en_docs_path)), (125        f"Path must be inside {en_docs_path}"126    )127    out_path = generate_lang_path(lang=language, path=en_path)128    out_path.parent.mkdir(parents=True, exist_ok=True)129    original_content = en_path.read_text(encoding="utf-8")130    old_translation: str | None = None131    if out_path.exists():132        print(f"Found existing translation: {out_path}")133        old_translation = out_path.read_text(encoding="utf-8")134    print(f"Translating {en_path} to {language} ({language_name})")135    agent = Agent("openai:gpt-5")136137    MAX_ATTEMPTS = 3138    additional_instructions = ""139    for attempt_no in range(1, MAX_ATTEMPTS + 1):140        print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})")141        prompt = get_prompt(142            lang_prompt_content=lang_prompt_content,143            old_translation=old_translation,144            language=language,145            language_name=language_name,146            original_content=original_content,147            additional_instructions=additional_instructions,148        )149        result = agent.run_sync(150            prompt.replace(151                "[placeholder_for_additional_instructions]", additional_instructions152            )153        )154        out_content = f"{result.output.strip()}\n"155        try:156            check_translation(157                doc_lines=out_content.splitlines(),158                en_doc_lines=original_content.splitlines(),159                lang_code=language,160                auto_fix=False,161                path=str(out_path),162            )163            break  # Exit loop if no errors164        except ValueError as e:165            print(166                f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}"167            )168            additional_instructions = (169                f"Current translation fails validation checks ({str(e)}). "170                "Please, pay special attention to it."171            )172            old_translation = out_content173            continue  # Retry if not reached max attempts174    else:  # Max retry attempts reached175        print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts")176177    print(f"Saving translation to {out_path}")178    out_path.write_text(out_content, encoding="utf-8", newline="\n")179180181def iter_all_en_paths() -> Iterable[Path]:182    """183    Iterate on the markdown files to translate in order of priority.184    """185    first_dirs = [186        Path("docs/en/docs/learn"),187        Path("docs/en/docs/tutorial"),188        Path("docs/en/docs/advanced"),189        Path("docs/en/docs/about"),190        Path("docs/en/docs/how-to"),191    ]192    first_parent = Path("docs/en/docs")193    yield from first_parent.glob("*.md")194    for dir_path in first_dirs:195        yield from dir_path.rglob("*.md")196    first_dirs_str = tuple(str(d) for d in first_dirs)197    for path in Path("docs/en/docs").rglob("*.md"):198        if str(path).startswith(first_dirs_str):199            continue200        if path.parent == first_parent:201            continue202        yield path203204205def iter_en_paths_to_translate() -> Iterable[Path]:206    en_docs_root = Path("docs/en/docs/")207    for path in iter_all_en_paths():208        relpath = path.relative_to(en_docs_root)209        if not str(relpath).startswith(non_translated_sections):210            yield path211212213@app.command()214def translate_lang(language: Annotated[str, typer.Option(envvar="LANGUAGE")]) -> None:215    paths_to_process = list(iter_en_paths_to_translate())216    print("Original paths:")217    for p in paths_to_process:218        print(f"  - {p}")219    print(f"Total original paths: {len(paths_to_process)}")220    missing_paths: list[Path] = []221    skipped_paths: list[Path] = []222    for p in paths_to_process:223        lang_path = generate_lang_path(lang=language, path=p)224        if lang_path.exists():225            skipped_paths.append(p)226            continue227        missing_paths.append(p)228    print("Paths to skip:")229    for p in skipped_paths:230        print(f"  - {p}")231    print(f"Total paths to skip: {len(skipped_paths)}")232    print("Paths to process:")233    for p in missing_paths:234        print(f"  - {p}")235    print(f"Total paths to process: {len(missing_paths)}")236    for p in missing_paths:237        print(f"Translating: {p}")238        translate_page(language="es", en_path=p)239        print(f"Done translating: {p}")240241242def get_llm_translatable() -> list[str]:243    translatable_langs = []244    langs = get_langs()245    for lang in langs:246        if lang == "en":247            continue248        lang_prompt_path = Path(f"docs/{lang}/llm-prompt.md")249        if lang_prompt_path.exists():250            translatable_langs.append(lang)251    return translatable_langs252253254@app.command()255def list_llm_translatable() -> list[str]:256    translatable_langs = get_llm_translatable()257    print("LLM translatable languages:", translatable_langs)258    return translatable_langs259260261@app.command()262def llm_translatable_json(263    language: Annotated[str | None, typer.Option(envvar="LANGUAGE")] = None,264) -> None:265    translatable_langs = get_llm_translatable()266    if language:267        if language in translatable_langs:268            print(json.dumps([language]))269            return270        else:271            raise typer.Exit(code=1)272    print(json.dumps(translatable_langs))273274275@app.command()276def commands_json(277    command: Annotated[str | None, typer.Option(envvar="COMMAND")] = None,278) -> None:279    available_commands = [280        "translate-page",281        "translate-lang",282        "update-outdated",283        "add-missing",284        "update-and-add",285        "remove-removable",286    ]287    default_commands = [288        "remove-removable",289        "update-outdated",290        "add-missing",291    ]292    if command:293        if command in available_commands:294            print(json.dumps([command]))295            return296        else:297            raise typer.Exit(code=1)298    print(json.dumps(default_commands))299300301@app.command()302def list_removable(language: str) -> list[Path]:303    removable_paths: list[Path] = []304    lang_paths = Path(f"docs/{language}").rglob("*.md")305    for path in lang_paths:306        en_path = generate_en_path(lang=language, path=path)307        if not en_path.exists():308            removable_paths.append(path)309    print(removable_paths)310    return removable_paths311312313@app.command()314def list_all_removable() -> list[Path]:315    all_removable_paths: list[Path] = []316    langs = get_langs()317    for lang in langs:318        if lang == "en":319            continue320        removable_paths = list_removable(lang)321        all_removable_paths.extend(removable_paths)322    print(all_removable_paths)323    return all_removable_paths324325326@app.command()327def remove_removable(language: Annotated[str, typer.Option(envvar="LANGUAGE")]) -> None:328    removable_paths = list_removable(language)329    for path in removable_paths:330        path.unlink()331        print(f"Removed: {path}")332    print("Done removing all removable paths")333334335@app.command()336def remove_all_removable() -> None:337    all_removable = list_all_removable()338    for removable_path in all_removable:339        removable_path.unlink()340        print(f"Removed: {removable_path}")341    print("Done removing all removable paths")342343344@app.command()345def list_missing(language: str) -> list[Path]:346    missing_paths: list[Path] = []347    en_lang_paths = list(iter_en_paths_to_translate())348    for path in en_lang_paths:349        lang_path = generate_lang_path(lang=language, path=path)350        if not lang_path.exists():351            missing_paths.append(path)352    print(missing_paths)353    return missing_paths354355356@app.command()357def list_outdated(language: str) -> list[Path]:358    dir_path = Path(__file__).absolute().parent.parent359    repo = git.Repo(dir_path)360361    outdated_paths: list[Path] = []362    en_lang_paths = list(iter_en_paths_to_translate())363    for path in en_lang_paths:364        lang_path = generate_lang_path(lang=language, path=path)365        if not lang_path.exists():366            continue367        en_commit_datetime = list(repo.iter_commits(paths=path, max_count=1))[368            0369        ].committed_datetime370        lang_commit_datetime = list(repo.iter_commits(paths=lang_path, max_count=1))[371            0372        ].committed_datetime373        if lang_commit_datetime < en_commit_datetime:374            outdated_paths.append(path)375    print(outdated_paths)376    return outdated_paths377378379@app.command()380def update_outdated(381    language: Annotated[str, typer.Option(envvar="LANGUAGE")],382    max: Annotated[int, typer.Option(envvar="MAX")] = 10,383) -> None:384    outdated_paths = list_outdated(language)385    for path in outdated_paths[:max]:386        print(f"Updating lang: {language} path: {path}")387        translate_page(language=language, en_path=path)388        print(f"Done updating: {path}")389    print("Done updating all outdated paths")390391392@app.command()393def add_missing(394    language: Annotated[str, typer.Option(envvar="LANGUAGE")],395    max: Annotated[int, typer.Option(envvar="MAX")] = 10,396) -> None:397    missing_paths = list_missing(language)398    for path in missing_paths[:max]:399        print(f"Adding lang: {language} path: {path}")400        translate_page(language=language, en_path=path)401        print(f"Done adding: {path}")402    print("Done adding all missing paths")403404405@app.command()406def update_and_add(407    language: Annotated[str, typer.Option(envvar="LANGUAGE")],408    max: Annotated[int, typer.Option(envvar="MAX")] = 10,409) -> None:410    print(f"Updating outdated translations for {language}")411    update_outdated(language=language, max=max)412    print(f"Adding missing translations for {language}")413    add_missing(language=language, max=max)414    print(f"Done updating and adding for {language}")415416417@app.command()418def make_pr(419    *,420    language: Annotated[str | None, typer.Option(envvar="LANGUAGE")] = None,421    command: Annotated[str | None, typer.Option(envvar="COMMAND")] = None,422    github_token: Annotated[str, typer.Option(envvar="GITHUB_TOKEN")],423    github_repository: Annotated[str, typer.Option(envvar="GITHUB_REPOSITORY")],424    commit_in_place: Annotated[425        bool, typer.Option(envvar="COMMIT_IN_PLACE", show_default=True)426    ] = False,427) -> None:428    print("Setting up GitHub Actions git user")429    repo = git.Repo(Path(__file__).absolute().parent.parent)430    if not repo.is_dirty(untracked_files=True):431        print("Repository is clean, no changes to commit")432        return433    subprocess.run(["git", "config", "user.name", "github-actions[bot]"], check=True)434    subprocess.run(435        ["git", "config", "user.email", "github-actions[bot]@users.noreply.github.com"],436        check=True,437    )438    current_branch = repo.active_branch.name439    if current_branch == "master" and commit_in_place:440        print("Can't commit directly to master")441        raise typer.Exit(code=1)442443    if not commit_in_place:444        branch_name = "translate"445        if language:446            branch_name += f"-{language}"447        if command:448            branch_name += f"-{command}"449        branch_name += f"-{secrets.token_hex(4)}"450        print(f"Creating a new branch {branch_name}")451        subprocess.run(["git", "checkout", "-b", branch_name], check=True)452    else:453        branch_name = current_branch454        print(f"Committing in place on branch {branch_name}")455    print("Adding updated files")456    git_path = Path("docs")457    subprocess.run(["git", "add", str(git_path)], check=True)458    print("Committing updated file")459    message = "🌐 Update translations"460    if language:461        message += f" for {language}"462    if command:463        message += f" ({command})"464    subprocess.run(["git", "commit", "-m", message], check=True)465    print("Pushing branch")466    subprocess.run(["git", "push", "origin", branch_name], check=True)467    if not commit_in_place:468        print("Creating PR")469        g = Github(github_token)470        gh_repo = g.get_repo(github_repository)471        body = (472            message473            + "\n\nThis PR was created automatically using LLMs."474            + f"\n\nIt uses the prompt file https://github.com/fastapi/fastapi/blob/master/docs/{language}/llm-prompt.md."475            + "\n\nIn most cases, it's better to make PRs updating that file so that the LLM can do a better job generating the translations than suggesting changes in this PR."476        )477        pr = gh_repo.create_pull(478            title=message, body=body, base="master", head=branch_name479        )480        print(f"Created PR: {pr.number}")481    print("Finished")482483484if __name__ == "__main__":485    app()

Code quality findings 76

Ensure functions have docstrings for documentation
missing-docstring
def get_langs() -> dict[str, str]:
Ensure functions have docstrings for documentation
missing-docstring
def generate_lang_path(*, lang: str, path: Path) -> Path:
Ensure functions have docstrings for documentation
missing-docstring
def generate_en_path(*, lang: str, path: Path) -> Path:
Ensure functions have docstrings for documentation
missing-docstring
def get_prompt(
Ensure functions have docstrings for documentation
missing-docstring
def translate_page(
Use logging module for better control and configurability
print-statement
print(f"Found existing translation: {out_path}")
Use logging module for better control and configurability
print-statement
print(f"Translating {en_path} to {language} ({language_name})")
Use logging module for better control and configurability
print-statement
print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})")
Use logging module for better control and configurability
print-statement
print(
Use logging module for better control and configurability
print-statement
print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts")
Use logging module for better control and configurability
print-statement
print(f"Saving translation to {out_path}")
Ensure functions have docstrings for documentation
missing-docstring
def iter_en_paths_to_translate() -> Iterable[Path]:
Ensure functions have docstrings for documentation
missing-docstring
def translate_lang(language: Annotated[str, typer.Option(envvar="LANGUAGE")]) -> None:
Avoid unnecessary list conversions; use generators where possible
unnecessary-list
paths_to_process = list(iter_en_paths_to_translate())
Use logging module for better control and configurability
print-statement
print("Original paths:")
Use logging module for better control and configurability
print-statement
print(f" - {p}")
Use logging module for better control and configurability
print-statement
print(f"Total original paths: {len(paths_to_process)}")
Use logging module for better control and configurability
print-statement
print("Paths to skip:")
Use logging module for better control and configurability
print-statement
print(f" - {p}")
Use logging module for better control and configurability
print-statement
print(f"Total paths to skip: {len(skipped_paths)}")
Use logging module for better control and configurability
print-statement
print("Paths to process:")
Use logging module for better control and configurability
print-statement
print(f" - {p}")
Use logging module for better control and configurability
print-statement
print(f"Total paths to process: {len(missing_paths)}")
Use logging module for better control and configurability
print-statement
print(f"Translating: {p}")
Use logging module for better control and configurability
print-statement
print(f"Done translating: {p}")
Ensure functions have docstrings for documentation
missing-docstring
def get_llm_translatable() -> list[str]:
Ensure functions have docstrings for documentation
missing-docstring
def list_llm_translatable() -> list[str]:
Use logging module for better control and configurability
print-statement
print("LLM translatable languages:", translatable_langs)
Ensure functions have docstrings for documentation
missing-docstring
def llm_translatable_json(
Use logging module for better control and configurability
print-statement
print(json.dumps([language]))
Use logging module for better control and configurability
print-statement
print(json.dumps(translatable_langs))
Ensure functions have docstrings for documentation
missing-docstring
def commands_json(
Use logging module for better control and configurability
print-statement
print(json.dumps([command]))
Use logging module for better control and configurability
print-statement
print(json.dumps(default_commands))
Ensure functions have docstrings for documentation
missing-docstring
def list_removable(language: str) -> list[Path]:
Use logging module for better control and configurability
print-statement
print(removable_paths)
Ensure functions have docstrings for documentation
missing-docstring
def list_all_removable() -> list[Path]:
Use logging module for better control and configurability
print-statement
print(all_removable_paths)
Ensure functions have docstrings for documentation
missing-docstring
def remove_removable(language: Annotated[str, typer.Option(envvar="LANGUAGE")]) -> None:
Use logging module for better control and configurability
print-statement
print(f"Removed: {path}")
Use logging module for better control and configurability
print-statement
print("Done removing all removable paths")
Ensure functions have docstrings for documentation
missing-docstring
def remove_all_removable() -> None:
Use logging module for better control and configurability
print-statement
print(f"Removed: {removable_path}")
Use logging module for better control and configurability
print-statement
print("Done removing all removable paths")
Ensure functions have docstrings for documentation
missing-docstring
def list_missing(language: str) -> list[Path]:
Avoid unnecessary list conversions; use generators where possible
unnecessary-list
en_lang_paths = list(iter_en_paths_to_translate())
Use logging module for better control and configurability
print-statement
print(missing_paths)
Ensure functions have docstrings for documentation
missing-docstring
def list_outdated(language: str) -> list[Path]:
Avoid unnecessary list conversions; use generators where possible
unnecessary-list
en_lang_paths = list(iter_en_paths_to_translate())
Avoid unnecessary list conversions; use generators where possible
unnecessary-list
en_commit_datetime = list(repo.iter_commits(paths=path, max_count=1))[
Avoid unnecessary list conversions; use generators where possible
unnecessary-list
lang_commit_datetime = list(repo.iter_commits(paths=lang_path, max_count=1))[
Use logging module for better control and configurability
print-statement
print(outdated_paths)
Ensure functions have docstrings for documentation
missing-docstring
def update_outdated(
Use logging module for better control and configurability
print-statement
print(f"Updating lang: {language} path: {path}")
Use logging module for better control and configurability
print-statement
print(f"Done updating: {path}")
Use logging module for better control and configurability
print-statement
print("Done updating all outdated paths")
Ensure functions have docstrings for documentation
missing-docstring
def add_missing(
Use logging module for better control and configurability
print-statement
print(f"Adding lang: {language} path: {path}")
Use logging module for better control and configurability
print-statement
print(f"Done adding: {path}")
Use logging module for better control and configurability
print-statement
print("Done adding all missing paths")
Ensure functions have docstrings for documentation
missing-docstring
def update_and_add(
Use logging module for better control and configurability
print-statement
print(f"Updating outdated translations for {language}")
Use logging module for better control and configurability
print-statement
print(f"Adding missing translations for {language}")
Use logging module for better control and configurability
print-statement
print(f"Done updating and adding for {language}")
Ensure functions have docstrings for documentation
missing-docstring
def make_pr(
Use logging module for better control and configurability
print-statement
print("Setting up GitHub Actions git user")
Use logging module for better control and configurability
print-statement
print("Repository is clean, no changes to commit")
Use logging module for better control and configurability
print-statement
print("Can't commit directly to master")
Use logging module for better control and configurability
print-statement
print(f"Creating a new branch {branch_name}")
Use logging module for better control and configurability
print-statement
print(f"Committing in place on branch {branch_name}")
Use logging module for better control and configurability
print-statement
print("Adding updated files")
Use logging module for better control and configurability
print-statement
print("Committing updated file")
Use logging module for better control and configurability
print-statement
print("Pushing branch")
Use logging module for better control and configurability
print-statement
print("Creating PR")
Use logging module for better control and configurability
print-statement
print(f"Created PR: {pr.number}")
Use logging module for better control and configurability
print-statement
print("Finished")

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.