PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/doc/translations/extract.py

https://gitlab.com/godotengine/godot
Python | 310 lines | 303 code | 4 blank | 3 comment | 6 complexity | c82533b832beb04263cdc9ff2ac21428 MD5 | raw file
  1. #!/usr/bin/env python3
  2. import argparse
  3. import os
  4. import shutil
  5. from collections import OrderedDict
  6. EXTRACT_TAGS = ["description", "brief_description", "member", "constant", "theme_item", "link"]
  7. HEADER = """\
  8. # LANGUAGE translation of the Godot Engine class reference.
  9. # Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.
  10. # Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).
  11. # This file is distributed under the same license as the Godot source code.
  12. #
  13. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  14. #
  15. #, fuzzy
  16. msgid ""
  17. msgstr ""
  18. "Project-Id-Version: Godot Engine class reference\\n"
  19. "Report-Msgid-Bugs-To: https://github.com/godotengine/godot\\n"
  20. "MIME-Version: 1.0\\n"
  21. "Content-Type: text/plain; charset=UTF-8\\n"
  22. "Content-Transfer-Encoding: 8-bit\\n"
  23. """
  24. # Some strings used by make_rst.py are normally part of the editor translations,
  25. # so we need to include them manually here for the online docs.
  26. BASE_STRINGS = [
  27. "Description",
  28. "Tutorials",
  29. "Properties",
  30. "Constructors",
  31. "Methods",
  32. "Operators",
  33. "Theme Properties",
  34. "Signals",
  35. "Enumerations",
  36. "Constants",
  37. "Property Descriptions",
  38. "Constructor Descriptions",
  39. "Method Descriptions",
  40. "Operator Descriptions",
  41. "Theme Property Descriptions",
  42. "Inherits:",
  43. "Inherited By:",
  44. "(overrides %s)",
  45. "Default",
  46. "Setter",
  47. "value",
  48. "Getter",
  49. "This method should typically be overridden by the user to have any effect.",
  50. "This method has no side effects. It doesn't modify any of the instance's member variables.",
  51. "This method accepts any number of arguments after the ones described here.",
  52. "This method is used to construct a type.",
  53. "This method doesn't need an instance to be called, so it can be called directly using the class name.",
  54. "This method describes a valid operator to use with this type as left-hand operand.",
  55. ]
  56. ## <xml-line-number-hack from="https://stackoverflow.com/a/36430270/10846399">
  57. import sys
  58. sys.modules["_elementtree"] = None
  59. import xml.etree.ElementTree as ET
  60. ## override the parser to get the line number
  61. class LineNumberingParser(ET.XMLParser):
  62. def _start(self, *args, **kwargs):
  63. ## Here we assume the default XML parser which is expat
  64. ## and copy its element position attributes into output Elements
  65. element = super(self.__class__, self)._start(*args, **kwargs)
  66. element._start_line_number = self.parser.CurrentLineNumber
  67. element._start_column_number = self.parser.CurrentColumnNumber
  68. element._start_byte_index = self.parser.CurrentByteIndex
  69. return element
  70. def _end(self, *args, **kwargs):
  71. element = super(self.__class__, self)._end(*args, **kwargs)
  72. element._end_line_number = self.parser.CurrentLineNumber
  73. element._end_column_number = self.parser.CurrentColumnNumber
  74. element._end_byte_index = self.parser.CurrentByteIndex
  75. return element
  76. ## </xml-line-number-hack>
  77. class Desc:
  78. def __init__(self, line_no, msg, desc_list=None):
  79. ## line_no : the line number where the desc is
  80. ## msg : the description string
  81. ## desc_list : the DescList it belongs to
  82. self.line_no = line_no
  83. self.msg = msg
  84. self.desc_list = desc_list
  85. class DescList:
  86. def __init__(self, doc, path):
  87. ## doc : root xml element of the document
  88. ## path : file path of the xml document
  89. ## list : list of Desc objects for this document
  90. self.doc = doc
  91. self.path = path
  92. self.list = []
  93. def print_error(error):
  94. print("ERROR: {}".format(error))
  95. ## build classes with xml elements recursively
  96. def _collect_classes_dir(path, classes):
  97. if not os.path.isdir(path):
  98. print_error("Invalid directory path: {}".format(path))
  99. exit(1)
  100. for _dir in map(lambda dir: os.path.join(path, dir), os.listdir(path)):
  101. if os.path.isdir(_dir):
  102. _collect_classes_dir(_dir, classes)
  103. elif os.path.isfile(_dir):
  104. if not _dir.endswith(".xml"):
  105. # print("Got non-.xml file '{}', skipping.".format(path))
  106. continue
  107. _collect_classes_file(_dir, classes)
  108. ## opens a file and parse xml add to classes
  109. def _collect_classes_file(path, classes):
  110. if not os.path.isfile(path) or not path.endswith(".xml"):
  111. print_error("Invalid xml file path: {}".format(path))
  112. exit(1)
  113. print("Collecting file: {}".format(os.path.basename(path)))
  114. try:
  115. tree = ET.parse(path, parser=LineNumberingParser())
  116. except ET.ParseError as e:
  117. print_error("Parse error reading file '{}': {}".format(path, e))
  118. exit(1)
  119. doc = tree.getroot()
  120. if "name" in doc.attrib:
  121. if "version" not in doc.attrib:
  122. print_error("Version missing from 'doc', file: {}".format(path))
  123. name = doc.attrib["name"]
  124. if name in classes:
  125. print_error("Duplicate class {} at path {}".format(name, path))
  126. exit(1)
  127. classes[name] = DescList(doc, path)
  128. else:
  129. print_error("Unknown XML file {}, skipping".format(path))
  130. ## regions are list of tuples with size 3 (start_index, end_index, indent)
  131. ## indication in string where the codeblock starts, ends, and it's indent
  132. ## if i inside the region returns the indent, else returns -1
  133. def _get_xml_indent(i, regions):
  134. for region in regions:
  135. if region[0] < i < region[1]:
  136. return region[2]
  137. return -1
  138. ## find and build all regions of codeblock which we need later
  139. def _make_codeblock_regions(desc, path=""):
  140. code_block_end = False
  141. code_block_index = 0
  142. code_block_regions = []
  143. while not code_block_end:
  144. code_block_index = desc.find("[codeblock]", code_block_index)
  145. if code_block_index < 0:
  146. break
  147. xml_indent = 0
  148. while True:
  149. ## [codeblock] always have a trailing new line and some tabs
  150. ## those tabs are belongs to xml indentations not code indent
  151. if desc[code_block_index + len("[codeblock]\n") + xml_indent] == "\t":
  152. xml_indent += 1
  153. else:
  154. break
  155. end_index = desc.find("[/codeblock]", code_block_index)
  156. if end_index < 0:
  157. print_error("Non terminating codeblock: {}".format(path))
  158. exit(1)
  159. code_block_regions.append((code_block_index, end_index, xml_indent))
  160. code_block_index += 1
  161. return code_block_regions
  162. def _strip_and_split_desc(desc, code_block_regions):
  163. desc_strip = "" ## a stripped desc msg
  164. total_indent = 0 ## code indent = total indent - xml indent
  165. for i in range(len(desc)):
  166. c = desc[i]
  167. if c == "\n":
  168. c = "\\n"
  169. if c == '"':
  170. c = '\\"'
  171. if c == "\\":
  172. c = "\\\\" ## <element \> is invalid for msgmerge
  173. if c == "\t":
  174. xml_indent = _get_xml_indent(i, code_block_regions)
  175. if xml_indent >= 0:
  176. total_indent += 1
  177. if xml_indent < total_indent:
  178. c = "\\t"
  179. else:
  180. continue
  181. else:
  182. continue
  183. desc_strip += c
  184. if c == "\\n":
  185. total_indent = 0
  186. return desc_strip
  187. ## make catalog strings from xml elements
  188. def _make_translation_catalog(classes):
  189. unique_msgs = OrderedDict()
  190. for class_name in classes:
  191. desc_list = classes[class_name]
  192. for elem in desc_list.doc.iter():
  193. if elem.tag in EXTRACT_TAGS:
  194. elem_text = elem.text
  195. if elem.tag == "link":
  196. elem_text = elem.attrib["title"] if "title" in elem.attrib else ""
  197. if not elem_text or len(elem_text) == 0:
  198. continue
  199. line_no = elem._start_line_number if elem_text[0] != "\n" else elem._start_line_number + 1
  200. desc_str = elem_text.strip()
  201. code_block_regions = _make_codeblock_regions(desc_str, desc_list.path)
  202. desc_msg = _strip_and_split_desc(desc_str, code_block_regions)
  203. desc_obj = Desc(line_no, desc_msg, desc_list)
  204. desc_list.list.append(desc_obj)
  205. if desc_msg not in unique_msgs:
  206. unique_msgs[desc_msg] = [desc_obj]
  207. else:
  208. unique_msgs[desc_msg].append(desc_obj)
  209. return unique_msgs
  210. ## generate the catalog file
  211. def _generate_translation_catalog_file(unique_msgs, output, location_line=False):
  212. with open(output, "w", encoding="utf8") as f:
  213. f.write(HEADER)
  214. for msg in BASE_STRINGS:
  215. f.write("#: doc/tools/make_rst.py\n")
  216. f.write('msgid "{}"\n'.format(msg))
  217. f.write('msgstr ""\n\n')
  218. for msg in unique_msgs:
  219. if len(msg) == 0 or msg in BASE_STRINGS:
  220. continue
  221. f.write("#:")
  222. desc_list = unique_msgs[msg]
  223. for desc in desc_list:
  224. path = desc.desc_list.path.replace("\\", "/")
  225. if path.startswith("./"):
  226. path = path[2:]
  227. if location_line: # Can be skipped as diffs on line numbers are spammy.
  228. f.write(" {}:{}".format(path, desc.line_no))
  229. else:
  230. f.write(" {}".format(path))
  231. f.write("\n")
  232. f.write('msgid "{}"\n'.format(msg))
  233. f.write('msgstr ""\n\n')
  234. ## TODO: what if 'nt'?
  235. if os.name == "posix":
  236. print("Wrapping template at 79 characters for compatibility with Weblate.")
  237. os.system("msgmerge -w79 {0} {0} > {0}.wrap".format(output))
  238. shutil.move("{}.wrap".format(output), output)
  239. def main():
  240. parser = argparse.ArgumentParser()
  241. parser.add_argument(
  242. "--path", "-p", nargs="+", default=".", help="The directory or directories containing XML files to collect."
  243. )
  244. parser.add_argument("--output", "-o", default="translation_catalog.pot", help="The path to the output file.")
  245. args = parser.parse_args()
  246. output = os.path.abspath(args.output)
  247. if not os.path.isdir(os.path.dirname(output)) or not output.endswith(".pot"):
  248. print_error("Invalid output path: {}".format(output))
  249. exit(1)
  250. classes = OrderedDict()
  251. for path in args.path:
  252. if not os.path.isdir(path):
  253. print_error("Invalid working directory path: {}".format(path))
  254. exit(1)
  255. print("\nCurrent working dir: {}".format(path))
  256. path_classes = OrderedDict() ## dictionary of key=class_name, value=DescList objects
  257. _collect_classes_dir(path, path_classes)
  258. classes.update(path_classes)
  259. classes = OrderedDict(sorted(classes.items(), key=lambda kv: kv[0].lower()))
  260. unique_msgs = _make_translation_catalog(classes)
  261. _generate_translation_catalog_file(unique_msgs, output)
  262. if __name__ == "__main__":
  263. main()