PageRenderTime 2260ms CodeModel.GetById 43ms RepoModel.GetById 0ms app.codeStats 0ms

/Tools/c-globals/check-c-globals.py

https://github.com/albertz/CPython
Python | 446 lines | 388 code | 50 blank | 8 comment | 60 complexity | d47c3049065b17e1840bed7090000537 MD5 | raw file
  1. from collections import namedtuple
  2. import glob
  3. import os.path
  4. import re
  5. import shutil
  6. import sys
  7. import subprocess
  8. VERBOSITY = 2
  9. C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
  10. TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
  11. ROOT_DIR = os.path.dirname(TOOLS_DIR)
  12. GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
  13. SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
  14. CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
  15. IGNORED_VARS = {
  16. '_DYNAMIC',
  17. '_GLOBAL_OFFSET_TABLE_',
  18. '__JCR_LIST__',
  19. '__JCR_END__',
  20. '__TMC_END__',
  21. '__bss_start',
  22. '__data_start',
  23. '__dso_handle',
  24. '_edata',
  25. '_end',
  26. }
  27. def find_capi_vars(root):
  28. capi_vars = {}
  29. for dirname in SOURCE_DIRS:
  30. for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
  31. recursive=True):
  32. with open(filename) as file:
  33. for name in _find_capi_vars(file):
  34. if name in capi_vars:
  35. assert not filename.endswith('.c')
  36. assert capi_vars[name].endswith('.c')
  37. capi_vars[name] = filename
  38. return capi_vars
  39. def _find_capi_vars(lines):
  40. for line in lines:
  41. if not line.startswith('PyAPI_DATA'):
  42. continue
  43. assert '{' not in line
  44. match = CAPI_REGEX.match(line)
  45. assert match
  46. names, = match.groups()
  47. for name in names.split(', '):
  48. yield name
  49. def _read_global_names(filename):
  50. # These variables are shared between all interpreters in the process.
  51. with open(filename) as file:
  52. return {line.partition('#')[0].strip()
  53. for line in file
  54. if line.strip() and not line.startswith('#')}
  55. def _is_global_var(name, globalnames):
  56. if _is_autogen_var(name):
  57. return True
  58. if _is_type_var(name):
  59. return True
  60. if _is_module(name):
  61. return True
  62. if _is_exception(name):
  63. return True
  64. if _is_compiler(name):
  65. return True
  66. return name in globalnames
  67. def _is_autogen_var(name):
  68. return (
  69. name.startswith('PyId_') or
  70. '.' in name or
  71. # Objects/typeobject.c
  72. name.startswith('op_id.') or
  73. name.startswith('rop_id.') or
  74. # Python/graminit.c
  75. name.startswith('arcs_') or
  76. name.startswith('states_')
  77. )
  78. def _is_type_var(name):
  79. if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
  80. return True
  81. if name.endswith('_desc'): # for structseq types
  82. return True
  83. return (
  84. name.startswith('doc_') or
  85. name.endswith(('_doc', '__doc__', '_docstring')) or
  86. name.endswith('_methods') or
  87. name.endswith('_fields') or
  88. name.endswith(('_memberlist', '_members')) or
  89. name.endswith('_slots') or
  90. name.endswith(('_getset', '_getsets', '_getsetlist')) or
  91. name.endswith('_as_mapping') or
  92. name.endswith('_as_number') or
  93. name.endswith('_as_sequence') or
  94. name.endswith('_as_buffer') or
  95. name.endswith('_as_async')
  96. )
  97. def _is_module(name):
  98. if name.endswith(('_functions', 'Methods', '_Methods')):
  99. return True
  100. if name == 'module_def':
  101. return True
  102. if name == 'initialized':
  103. return True
  104. return name.endswith(('module', '_Module'))
  105. def _is_exception(name):
  106. # Other vars are enumerated in globals-core.txt.
  107. if not name.startswith(('PyExc_', '_PyExc_')):
  108. return False
  109. return name.endswith(('Error', 'Warning'))
  110. def _is_compiler(name):
  111. return (
  112. # Python/Python-ast.c
  113. name.endswith('_type') or
  114. name.endswith('_singleton') or
  115. name.endswith('_attributes')
  116. )
  117. class Var(namedtuple('Var', 'name kind scope capi filename')):
  118. @classmethod
  119. def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
  120. _, _, line = line.partition(' ') # strip off the address
  121. line = line.strip()
  122. kind, _, line = line.partition(' ')
  123. if kind in ignored or ():
  124. return None
  125. elif kind not in expected or ():
  126. raise RuntimeError('unsupported NM type {!r}'.format(kind))
  127. name, _, filename = line.partition('\t')
  128. name = name.strip()
  129. if _is_autogen_var(name):
  130. return None
  131. if _is_global_var(name, globalnames):
  132. scope = 'global'
  133. else:
  134. scope = None
  135. capi = (name in capi_vars or ())
  136. if filename:
  137. filename = os.path.relpath(filename.partition(':')[0])
  138. return cls(name, kind, scope, capi, filename or '~???~')
  139. @property
  140. def external(self):
  141. return self.kind.isupper()
  142. def find_vars(root, globals_filename=GLOBALS_FILE):
  143. python = os.path.join(root, 'python')
  144. if not os.path.exists(python):
  145. raise RuntimeError('python binary missing (need to build it first?)')
  146. capi_vars = find_capi_vars(root)
  147. globalnames = _read_global_names(globals_filename)
  148. nm = shutil.which('nm')
  149. if nm is None:
  150. # XXX Use dumpbin.exe /SYMBOLS on Windows.
  151. raise NotImplementedError
  152. else:
  153. yield from (var
  154. for var in _find_var_symbols(python, nm, capi_vars,
  155. globalnames)
  156. if var.name not in IGNORED_VARS)
  157. NM_FUNCS = set('Tt')
  158. NM_PUBLIC_VARS = set('BD')
  159. NM_PRIVATE_VARS = set('bd')
  160. NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
  161. NM_DATA = set('Rr')
  162. NM_OTHER = set('ACGgiINpSsuUVvWw-?')
  163. NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
  164. def _find_var_symbols(python, nm, capi_vars, globalnames):
  165. args = [nm,
  166. '--line-numbers',
  167. python]
  168. out = subprocess.check_output(args)
  169. for line in out.decode('utf-8').splitlines():
  170. var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
  171. if var is None:
  172. continue
  173. yield var
  174. #######################################
  175. class Filter(namedtuple('Filter', 'name op value action')):
  176. @classmethod
  177. def parse(cls, raw):
  178. action = '+'
  179. if raw.startswith(('+', '-')):
  180. action = raw[0]
  181. raw = raw[1:]
  182. # XXX Support < and >?
  183. name, op, value = raw.partition('=')
  184. return cls(name, op, value, action)
  185. def check(self, var):
  186. value = getattr(var, self.name, None)
  187. if not self.op:
  188. matched = bool(value)
  189. elif self.op == '=':
  190. matched = (value == self.value)
  191. else:
  192. raise NotImplementedError
  193. if self.action == '+':
  194. return matched
  195. elif self.action == '-':
  196. return not matched
  197. else:
  198. raise NotImplementedError
  199. def filter_var(var, filters):
  200. for filter in filters:
  201. if not filter.check(var):
  202. return False
  203. return True
  204. def make_sort_key(spec):
  205. columns = [(col.strip('_'), '_' if col.startswith('_') else '')
  206. for col in spec]
  207. def sort_key(var):
  208. return tuple(getattr(var, col).lstrip(prefix)
  209. for col, prefix in columns)
  210. return sort_key
  211. def make_groups(allvars, spec):
  212. group = spec
  213. groups = {}
  214. for var in allvars:
  215. value = getattr(var, group)
  216. key = '{}: {}'.format(group, value)
  217. try:
  218. groupvars = groups[key]
  219. except KeyError:
  220. groupvars = groups[key] = []
  221. groupvars.append(var)
  222. return groups
  223. def format_groups(groups, columns, fmts, widths):
  224. for group in sorted(groups):
  225. groupvars = groups[group]
  226. yield '', 0
  227. yield ' # {}'.format(group), 0
  228. yield from format_vars(groupvars, columns, fmts, widths)
  229. def format_vars(allvars, columns, fmts, widths):
  230. fmt = ' '.join(fmts[col] for col in columns)
  231. fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
  232. header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
  233. yield header, 0
  234. div = ' '.join('-'*(widths[col]+2) for col in columns)
  235. yield div, 0
  236. for var in allvars:
  237. values = (getattr(var, col) for col in columns)
  238. row = fmt.format(*('X' if val is True else val or ''
  239. for val in values))
  240. yield row, 1
  241. yield div, 0
  242. #######################################
  243. COLUMNS = 'name,external,capi,scope,filename'
  244. COLUMN_NAMES = COLUMNS.split(',')
  245. COLUMN_WIDTHS = {col: len(col)
  246. for col in COLUMN_NAMES}
  247. COLUMN_WIDTHS.update({
  248. 'name': 50,
  249. 'scope': 7,
  250. 'filename': 40,
  251. })
  252. COLUMN_FORMATS = {col: '{:%s}' % width
  253. for col, width in COLUMN_WIDTHS.items()}
  254. for col in COLUMN_FORMATS:
  255. if COLUMN_WIDTHS[col] == len(col):
  256. COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
  257. def _parse_filters_arg(raw, error):
  258. filters = []
  259. for value in raw.split(','):
  260. value=value.strip()
  261. if not value:
  262. continue
  263. try:
  264. filter = Filter.parse(value)
  265. if filter.name not in COLUMN_NAMES:
  266. raise Exception('unsupported column {!r}'.format(filter.name))
  267. except Exception as e:
  268. error('bad filter {!r}: {}'.format(raw, e))
  269. filters.append(filter)
  270. return filters
  271. def _parse_columns_arg(raw, error):
  272. columns = raw.split(',')
  273. for column in columns:
  274. if column not in COLUMN_NAMES:
  275. error('unsupported column {!r}'.format(column))
  276. return columns
  277. def _parse_sort_arg(raw, error):
  278. sort = raw.split(',')
  279. for column in sort:
  280. if column.lstrip('_') not in COLUMN_NAMES:
  281. error('unsupported column {!r}'.format(column))
  282. return sort
  283. def _parse_group_arg(raw, error):
  284. if not raw:
  285. return raw
  286. group = raw
  287. if group not in COLUMN_NAMES:
  288. error('unsupported column {!r}'.format(group))
  289. if group != 'filename':
  290. error('unsupported group {!r}'.format(group))
  291. return group
  292. def parse_args(argv=None):
  293. if argv is None:
  294. argv = sys.argv[1:]
  295. import argparse
  296. parser = argparse.ArgumentParser()
  297. parser.add_argument('-v', '--verbose', action='count', default=0)
  298. parser.add_argument('-q', '--quiet', action='count', default=0)
  299. parser.add_argument('--filters', default='-scope',
  300. help='[[-]<COLUMN>[=<GLOB>]] ...')
  301. parser.add_argument('--columns', default=COLUMNS,
  302. help='a comma-separated list of columns to show')
  303. parser.add_argument('--sort', default='filename,_name',
  304. help='a comma-separated list of columns to sort')
  305. parser.add_argument('--group',
  306. help='group by the given column name (- to not group)')
  307. parser.add_argument('--rc-on-match', dest='rc', type=int)
  308. parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
  309. args = parser.parse_args(argv)
  310. verbose = vars(args).pop('verbose', 0)
  311. quiet = vars(args).pop('quiet', 0)
  312. args.verbosity = max(0, VERBOSITY + verbose - quiet)
  313. if args.sort.startswith('filename') and not args.group:
  314. args.group = 'filename'
  315. if args.rc is None:
  316. if '-scope=core' in args.filters or 'core' not in args.filters:
  317. args.rc = 0
  318. else:
  319. args.rc = 1
  320. args.filters = _parse_filters_arg(args.filters, parser.error)
  321. args.columns = _parse_columns_arg(args.columns, parser.error)
  322. args.sort = _parse_sort_arg(args.sort, parser.error)
  323. args.group = _parse_group_arg(args.group, parser.error)
  324. return args
  325. def main(root=ROOT_DIR, filename=GLOBALS_FILE,
  326. filters=None, columns=COLUMN_NAMES, sort=None, group=None,
  327. verbosity=VERBOSITY, rc=1):
  328. log = lambda msg: ...
  329. if verbosity >= 2:
  330. log = lambda msg: print(msg)
  331. allvars = (var
  332. for var in find_vars(root, filename)
  333. if filter_var(var, filters))
  334. if sort:
  335. allvars = sorted(allvars, key=make_sort_key(sort))
  336. if group:
  337. try:
  338. columns.remove(group)
  339. except ValueError:
  340. pass
  341. grouped = make_groups(allvars, group)
  342. lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
  343. else:
  344. lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
  345. total = 0
  346. for line, count in lines:
  347. total += count
  348. log(line)
  349. log('\ntotal: {}'.format(total))
  350. if total and rc:
  351. print('ERROR: found unsafe globals', file=sys.stderr)
  352. return rc
  353. return 0
  354. if __name__ == '__main__':
  355. args = parse_args()
  356. sys.exit(
  357. main(**vars(args)))