PageRenderTime 58ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/generate-sysdig-event.py

https://gitlab.com/jvelando/wireshark
Python | 380 lines | 288 code | 49 blank | 43 comment | 62 complexity | 7de8a3a9db5a37b301613dbb8bc34d88 MD5 | raw file
  1. #!/usr/bin/env python3
  2. #
  3. # Wireshark - Network traffic analyzer
  4. # By Gerald Combs <gerald@wireshark.org>
  5. # Copyright 1998 Gerald Combs
  6. #
  7. # SPDX-License-Identifier: GPL-2.0-or-later
  8. #
  9. '''\
  10. Generate Sysdig event dissector sections from the sysdig sources.
  11. Reads driver/event_table.c and driver/ppm_events_public.h and generates
  12. corresponding dissection code in packet-sysdig-event.c. Updates are
  13. performed in-place in the dissector code.
  14. Requires an Internet connection. Assets are loaded from GitHub over HTTPS, from falcosecurity/libs master.
  15. '''
  16. import logging
  17. import os
  18. import os.path
  19. import re
  20. import urllib.request, urllib.error, urllib.parse
  21. import sys
  22. sysdig_repo_pfx = 'https://raw.githubusercontent.com/falcosecurity/libs/master/'
  23. def exit_msg(msg=None, status=1):
  24. if msg is not None:
  25. sys.stderr.write(msg + '\n\n')
  26. sys.stderr.write(__doc__ + '\n')
  27. sys.exit(status)
  28. def get_url_lines(url):
  29. '''Open a URL.
  30. Returns the URL body as a list of lines.
  31. '''
  32. req_headers = { 'User-Agent': 'Wireshark generate-sysdig-event' }
  33. try:
  34. req = urllib.request.Request(url, headers=req_headers)
  35. response = urllib.request.urlopen(req)
  36. lines = response.read().decode().splitlines()
  37. response.close()
  38. except urllib.error.HTTPError as err:
  39. exit_msg("HTTP error fetching {0}: {1}".format(url, err.reason))
  40. except urllib.error.URLError as err:
  41. exit_msg("URL error fetching {0}: {1}".format(url, err.reason))
  42. except OSError as err:
  43. exit_msg("OS error fetching {0}".format(url, err.strerror))
  44. except Exception:
  45. exit_msg("Unexpected error:", sys.exc_info()[0])
  46. return lines
  47. ppm_ev_pub_lines = get_url_lines(sysdig_repo_pfx + 'driver/ppm_events_public.h')
  48. ppme_re = re.compile('^\s+PPME_([A-Z0-9_]+_[EX])\s*=\s*([0-9]+)\s*,')
  49. event_info_d = {}
  50. def get_event_defines():
  51. event_d = {}
  52. for line in ppm_ev_pub_lines:
  53. m = ppme_re.match(line)
  54. if m:
  55. event_d[int(m.group(2))] = m.group(1)
  56. return event_d
  57. ppm_ev_table_lines = get_url_lines(sysdig_repo_pfx + 'driver/event_table.c')
  58. hf_d = {}
  59. event_info_re = re.compile('^\s+/\*\s*PPME_.*\*\/\s*{\s*"([A-Za-z0-9_]+)"\s*,[^,]+,[^,]+,\s*([0-9]+)\s*[,{}]')
  60. event_param_re = re.compile('{\s*"([A-Za-z0-9_ ]+)"\s*,\s*PT_([A-Z0-9_]+)\s*,\s*PF_([A-Z0-9_]+)\s*[,}]')
  61. def get_event_names():
  62. '''Return a contiguous list of event names. Names are lower case.'''
  63. event_name_l = []
  64. for line in ppm_ev_table_lines:
  65. ei = event_info_re.match(line)
  66. if ei:
  67. event_name_l.append(ei.group(1))
  68. return event_name_l
  69. # PT_xxx to FT_xxx
  70. pt_to_ft = {
  71. 'BYTEBUF': 'BYTES',
  72. 'CHARBUF': 'STRING',
  73. 'FD': 'INT64',
  74. 'FSPATH': 'STRING',
  75. }
  76. # FT_xxx to BASE_xxx
  77. force_param_formats = {
  78. 'STRING': 'NONE',
  79. 'INT.*': 'DEC',
  80. }
  81. def get_event_params():
  82. '''Return a list of dictionaries containing event names and parameter info.'''
  83. event_param_l = []
  84. event_num = 0
  85. force_string_l = ['args', 'env']
  86. for line in ppm_ev_table_lines:
  87. ei = event_info_re.match(line)
  88. ep = event_param_re.findall(line)
  89. if ei and ep:
  90. event_name = ei.group(1)
  91. src_param_count = int(ei.group(2))
  92. if len(ep) != src_param_count:
  93. err_msg = '{}: found {} parameters. Expected {}. Params: {}'.format(
  94. event_name, len(ep), src_param_count, repr(ep))
  95. if len(ep) > src_param_count:
  96. logging.warning(err_msg)
  97. del ep[src_param_count:]
  98. else:
  99. raise NameError(err_msg)
  100. for p in ep:
  101. if p[0] in force_string_l:
  102. param_type = 'STRING'
  103. elif p[1] in pt_to_ft:
  104. param_type = pt_to_ft[p[1]]
  105. elif p[0] == 'flags' and p[1].startswith('INT') and 'HEX' in p[2]:
  106. param_type = 'U' + p[1]
  107. elif 'INT' in p[1]:
  108. # Ints
  109. param_type = p[1]
  110. else:
  111. # Fall back to bytes
  112. param_type = 'BYTES'
  113. if p[2] == 'NA':
  114. if 'INT' in param_type:
  115. param_format = 'DEC'
  116. else:
  117. param_format = 'NONE'
  118. elif param_type == 'BYTES':
  119. param_format = 'NONE'
  120. else:
  121. param_format = p[2]
  122. for pt_pat, force_pf in force_param_formats.items():
  123. if re.match(pt_pat, param_type) and param_format != force_pf:
  124. err_msg = 'Forcing {} {} format to {}. Params: {}'.format(
  125. event_name, param_type, force_pf, repr(ep))
  126. logging.warning(err_msg)
  127. param_format = force_pf
  128. param_d = {
  129. 'event_name': event_name,
  130. 'event_num': event_num,
  131. # use replace() to account for "plugin ID" param name (ie: param names with space)
  132. 'param_name': p[0].replace(" ", "_"),
  133. 'param_type': param_type,
  134. 'param_format': param_format,
  135. }
  136. event_param_l.append(param_d)
  137. if ei:
  138. event_num += 1
  139. return event_param_l
  140. def param_to_hf_name(param):
  141. return 'hf_param_{}_{}'.format(param['param_name'], param['param_type'].lower())
  142. def param_to_value_string_name(param):
  143. return '{}_{}_vals'.format(param['param_name'], param['param_type'].lower())
  144. def get_param_desc(param):
  145. # Try to coerce event names and parameters into human-friendly
  146. # strings.
  147. # XXX This could use some work.
  148. # Specific descriptions. Event name + parameter name.
  149. param_descs = {
  150. 'accept.queuepct': 'Accept queue per connection',
  151. 'execve.args': 'Program arguments',
  152. 'execve.comm': 'Command',
  153. 'execve.cwd': 'Current working directory',
  154. }
  155. # General descriptions. Event name only.
  156. event_descs = {
  157. 'ioctl': 'I/O control',
  158. }
  159. event_name = param['event_name']
  160. param_id = '{}.{}'.format(event_name, param['param_name'])
  161. if param_id in param_descs:
  162. param_desc = param_descs[param_id]
  163. elif event_name in event_descs:
  164. param_desc = '{}: {}'.format(event_descs[event_name], param['param_name'])
  165. else:
  166. param_desc = param['param_name']
  167. return param_desc
  168. def main():
  169. logging.basicConfig(format='%(levelname)s: %(message)s')
  170. # Event list
  171. event_d = get_event_defines()
  172. event_nums = list(event_d.keys())
  173. event_nums.sort()
  174. event_name_l = get_event_names()
  175. event_param_l = get_event_params()
  176. hf_d = {}
  177. for param in event_param_l:
  178. hf_name = param_to_hf_name(param)
  179. hf_d[hf_name] = param
  180. idx_id_to_name = { '': 'no' }
  181. parameter_index_l = []
  182. for en in range (0, len(event_nums)):
  183. param_id = ''
  184. param_l = []
  185. event_var = event_d[en].lower()
  186. for param in event_param_l:
  187. if param['event_num'] == en:
  188. hf_name = param_to_hf_name(param)
  189. param_l.append(hf_name)
  190. param_id += ':' + param['param_name'] + '_' + param['param_type']
  191. ei_str = ''
  192. if param_id not in idx_id_to_name:
  193. idx_id_to_name[param_id] = event_var
  194. ei_str = 'static int * const {}_indexes[] = {{ &{}, NULL }};'.format(
  195. event_var,
  196. ', &'.join(param_l)
  197. )
  198. else:
  199. ei_str = '#define {}_indexes {}_indexes'.format(event_var, idx_id_to_name[param_id])
  200. parameter_index_l.append(ei_str)
  201. dissector_path = os.path.join(os.path.dirname(__file__),
  202. '..', 'epan', 'dissectors', 'packet-sysdig-event.c')
  203. dissector_f = open(dissector_path, 'r')
  204. dissector_lines = list(dissector_f)
  205. dissector_f = open(dissector_path, 'w+')
  206. # Strip out old content
  207. strip_re_l = []
  208. strip_re_l.append(re.compile('^static\s+int\s+hf_param_.*;'))
  209. strip_re_l.append(re.compile('^#define\s+EVT_STR_[A-Z0-9_]+\s+"[A-Za-z0-9_]+"'))
  210. strip_re_l.append(re.compile('^#define\s+EVT_[A-Z0-9_]+\s+[0-9]+'))
  211. strip_re_l.append(re.compile('^\s*{\s*EVT_[A-Z0-9_]+\s*,\s*EVT_STR_[A-Z0-9_]+\s*}'))
  212. strip_re_l.append(re.compile('^static\s+const\s+int\s+\*\s*[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
  213. strip_re_l.append(re.compile('^static\s+int\s*\*\s+const\s+[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
  214. strip_re_l.append(re.compile('^\s*#define\s+[a-z0-9_]+_[ex]_indexes\s+[a-z0-9_]+_indexes'))
  215. strip_re_l.append(re.compile('^\s*\{\s*EVT_[A-Z0-9_]+_[EX]\s*,\s*[a-z0-9_]+_[ex]_indexes\s*}\s*,'))
  216. strip_re_l.append(re.compile('^\s*{\s*&hf_param_.*},')) # Must all be on one line
  217. for strip_re in strip_re_l:
  218. dissector_lines = [l for l in dissector_lines if not strip_re.search(l)]
  219. # Find our value strings
  220. value_string_re = re.compile('static\s+const\s+value_string\s+([A-Za-z0-9_]+_vals)')
  221. value_string_l = []
  222. for line in dissector_lines:
  223. vs = value_string_re.match(line)
  224. if vs:
  225. value_string_l.append(vs.group(1))
  226. # Add in new content after comments.
  227. header_fields_c = 'Header fields'
  228. header_fields_re = re.compile('/\*\s+' + header_fields_c, flags = re.IGNORECASE)
  229. header_fields_l = []
  230. for hf_name in sorted(hf_d.keys()):
  231. header_fields_l.append('static int {} = -1;'.format(hf_name))
  232. event_names_c = 'Event names'
  233. event_names_re = re.compile('/\*\s+' + event_names_c, flags = re.IGNORECASE)
  234. event_names_l = []
  235. event_str_l = list(set(event_name_l))
  236. event_str_l.sort()
  237. for evt_str in event_str_l:
  238. event_names_l.append('#define EVT_STR_{0:24s} "{1:s}"'.format(evt_str.upper(), evt_str))
  239. event_definitions_c = 'Event definitions'
  240. event_definitions_re = re.compile('/\*\s+' + event_definitions_c, flags = re.IGNORECASE)
  241. event_definitions_l = []
  242. for evt in event_nums:
  243. event_definitions_l.append('#define EVT_{0:24s} {1:3d}'.format(event_d[evt], evt))
  244. value_strings_c = 'Value strings'
  245. value_strings_re = re.compile('/\*\s+' + value_strings_c, flags = re.IGNORECASE)
  246. value_strings_l = []
  247. for evt in event_nums:
  248. evt_num = 'EVT_{},'.format(event_d[evt])
  249. evt_str = 'EVT_STR_' + event_name_l[evt].upper()
  250. value_strings_l.append(' {{ {0:<32s} {1:s} }},'.format(evt_num, evt_str))
  251. parameter_index_c = 'Parameter indexes'
  252. parameter_index_re = re.compile('/\*\s+' + parameter_index_c, flags = re.IGNORECASE)
  253. # parameter_index_l defined above.
  254. event_tree_c = 'Event tree'
  255. event_tree_re = re.compile('/\*\s+' + event_tree_c, flags = re.IGNORECASE)
  256. event_tree_l = []
  257. for evt in event_nums:
  258. evt_num = 'EVT_{}'.format(event_d[evt])
  259. evt_idx = '{}_indexes'.format(event_d[evt].lower())
  260. event_tree_l.append(' {{ {}, {} }},'.format(evt_num, evt_idx))
  261. header_field_reg_c = 'Header field registration'
  262. header_field_reg_re = re.compile('/\*\s+' + header_field_reg_c, flags = re.IGNORECASE)
  263. header_field_reg_l = []
  264. for hf_name in sorted(hf_d.keys()):
  265. param = hf_d[hf_name]
  266. event_name = param['event_name']
  267. param_desc = get_param_desc(param)
  268. param_name = param['param_name']
  269. param_type = param['param_type']
  270. param_format = param['param_format']
  271. fieldconvert = 'NULL'
  272. vs_name = param_to_value_string_name(param)
  273. if vs_name in value_string_l and 'INT' in param_type:
  274. fieldconvert = 'VALS({})'.format(vs_name)
  275. header_field_reg_l.append(' {{ &{}, {{ "{}", "sysdig.param.{}.{}", FT_{}, BASE_{}, {}, 0, NULL, HFILL }} }},'.format(
  276. hf_name,
  277. param_desc,
  278. event_name,
  279. param_name,
  280. param_type,
  281. param_format,
  282. fieldconvert
  283. ))
  284. for line in dissector_lines:
  285. fill_comment = None
  286. fill_l = []
  287. if header_fields_re.match(line):
  288. fill_comment = header_fields_c
  289. fill_l = header_fields_l
  290. elif event_names_re.match(line):
  291. fill_comment = event_names_c
  292. fill_l = event_names_l
  293. elif event_definitions_re.match(line):
  294. fill_comment = event_definitions_c
  295. fill_l = event_definitions_l
  296. elif value_strings_re.match(line):
  297. fill_comment = value_strings_c
  298. fill_l = value_strings_l
  299. elif parameter_index_re.match(line):
  300. fill_comment = parameter_index_c
  301. fill_l = parameter_index_l
  302. elif event_tree_re.match(line):
  303. fill_comment = event_tree_c
  304. fill_l = event_tree_l
  305. elif header_field_reg_re.match(line):
  306. fill_comment = header_field_reg_c
  307. fill_l = header_field_reg_l
  308. if fill_comment is not None:
  309. # Write our comment followed by the content
  310. print(('Generating {}, {:d} lines'.format(fill_comment, len(fill_l))))
  311. dissector_f.write('/* {}. Automatically generated by tools/{} */\n'.format(
  312. fill_comment,
  313. os.path.basename(__file__)
  314. ))
  315. for line in fill_l:
  316. dissector_f.write('{}\n'.format(line))
  317. # Fill each section only once
  318. del fill_l[:]
  319. else:
  320. # Existing content
  321. dissector_f.write(line)
  322. dissector_f.close()
  323. #
  324. # On with the show
  325. #
  326. if __name__ == "__main__":
  327. sys.exit(main())