PageRenderTime 107ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/socorro/collector/submitter_app.py

https://github.com/luser/socorro
Python | 240 lines | 236 code | 0 blank | 4 comment | 3 complexity | eefb63240fce30fd308e5c95515895ec MD5 | raw file
  1. #! /usr/bin/env python
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this
  4. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5. """this app will submit crashes to a socorro collector"""
  6. import time
  7. import json
  8. from os import (
  9. path,
  10. listdir
  11. )
  12. from configman import Namespace
  13. from socorrolib.app.fetch_transform_save_app import (
  14. FetchTransformSaveWithSeparateNewCrashSourceApp,
  15. main
  16. )
  17. from socorro.external.crashstorage_base import (
  18. CrashStorageBase,
  19. FileDumpsMapping,
  20. )
  21. from socorro.external.fs.filesystem import findFileGenerator
  22. from socorrolib.lib.util import DotDict
  23. #==============================================================================
  24. class SubmitterFileSystemWalkerSource(CrashStorageBase):
  25. """This is a crashstorage derivative that can walk an arbitrary file
  26. system path looking for crashes. The new_crashes generator yields
  27. pathnames rather than crash_ids - so it is not compatible with other
  28. instances of the CrashStorageSystem."""
  29. required_config = Namespace()
  30. required_config.add_option(
  31. 'search_root',
  32. doc="a filesystem location to begin a search for raw crash/dump sets",
  33. short_form='s',
  34. default=None
  35. )
  36. required_config.add_option(
  37. 'dump_suffix',
  38. doc="the standard file extension for dumps",
  39. default='.dump'
  40. )
  41. required_config.add_option(
  42. 'dump_field',
  43. doc="the default name for the main dump",
  44. default='upload_file_minidump'
  45. )
  46. #--------------------------------------------------------------------------
  47. def __init__(self, config, quit_check_callback=None):
  48. if isinstance(quit_check_callback, basestring):
  49. # this class is being used as a 'new_crash_source' and the name
  50. # of the app has been passed - we can ignore it
  51. quit_check_callback = None
  52. super(SubmitterFileSystemWalkerSource, self).__init__(
  53. config,
  54. quit_check_callback
  55. )
  56. #--------------------------------------------------------------------------
  57. def get_raw_crash(self, (prefix, path_tuple)):
  58. """the default implemntation of fetching a raw_crash
  59. parameters:
  60. path_tuple - a tuple of paths. the first element is the raw_crash
  61. pathname"""
  62. with open(path_tuple[0]) as raw_crash_fp:
  63. return DotDict(json.load(raw_crash_fp))
  64. #--------------------------------------------------------------------------
  65. def get_unredacted_processed(self, (prefix, path_tuple)):
  66. """the default implemntation of fetching a processed_crash
  67. parameters:
  68. path_tuple - a tuple of paths. the first element is the raw_crash
  69. pathname"""
  70. with open(path_tuple[0]) as processed_crash_fp:
  71. return DotDict(json.load(processed_crash_fp))
  72. #--------------------------------------------------------------------------
  73. def get_raw_dumps(self, prefix_path_tuple):
  74. file_dumps_mapping = self.get_raw_dumps_as_files(prefix_path_tuple)
  75. return file_dumps_mapping.as_memory_dumps_mapping()
  76. #--------------------------------------------------------------------------
  77. def get_raw_dumps_as_files(self, prefix_path_tuple):
  78. """the default implemntation of fetching a dump.
  79. parameters:
  80. dump_pathnames - a tuple of paths. the second element and beyond are
  81. the dump pathnames"""
  82. prefix, dump_pathnames = prefix_path_tuple
  83. return FileDumpsMapping(
  84. zip(
  85. self._dump_names_from_pathnames(dump_pathnames[1:]),
  86. dump_pathnames[1:]
  87. )
  88. )
  89. #--------------------------------------------------------------------------
  90. def _dump_names_from_pathnames(self, pathnames):
  91. """Given a list of pathnames of this form:
  92. (uuid[.name].dump)+
  93. This function will return a list of just the name part of the path.
  94. in the case where there is no name, it will use the default dump
  95. name from configuration.
  96. example:
  97. ['6611a662-e70f-4ba5-a397-69a3a2121129.dump',
  98. '6611a662-e70f-4ba5-a397-69a3a2121129.flash1.dump',
  99. '6611a662-e70f-4ba5-a397-69a3a2121129.flash2.dump',
  100. ]
  101. returns
  102. ['upload_file_minidump', 'flash1', 'flash2']
  103. """
  104. prefix = path.commonprefix([path.basename(x) for x in pathnames])
  105. prefix_length = len(prefix)
  106. dump_names = []
  107. for a_pathname in pathnames:
  108. base_name = path.basename(a_pathname)
  109. dump_name = base_name[prefix_length:-len(self.config.dump_suffix)]
  110. if not dump_name:
  111. dump_name = self.config.dump_field
  112. dump_names.append(dump_name)
  113. return dump_names
  114. #--------------------------------------------------------------------------
  115. def new_crashes(self):
  116. # loop over all files under the search_root that have a suffix of
  117. # ".json"
  118. for a_path, a_file_name, raw_crash_pathname in findFileGenerator(
  119. self.config.search_root,
  120. lambda x: x[2].endswith(".json")
  121. ):
  122. prefix = path.splitext(a_file_name)[0]
  123. crash_pathnames = [raw_crash_pathname]
  124. for dumpfilename in listdir(a_path):
  125. if (dumpfilename.startswith(prefix) and
  126. dumpfilename.endswith(self.config.dump_suffix)):
  127. crash_pathnames.append(
  128. path.join(a_path, dumpfilename)
  129. )
  130. # yield the pathnames of all the crash parts - normally, this
  131. # method in a crashstorage class yields just a crash_id. In this
  132. # case however, we have only pathnames to work with. So we return
  133. # this (args, kwargs) form instead
  134. yield (((prefix, crash_pathnames), ), {})
  135. #==============================================================================
  136. # this class was relocated to a more appropriate module and given a new name.
  137. # This import is offered for backwards compatibilty. Note, that there has also
  138. # been an internal change to the required config, with the source
  139. # implementation moved into a namespace
  140. from socorro.external.postgresql.new_crash_source import (
  141. DBCrashStorageWrapperNewCrashSource as DBSamplingCrashSource
  142. )
  143. #==============================================================================
  144. class SubmitterApp(FetchTransformSaveWithSeparateNewCrashSourceApp):
  145. app_name = 'submitter_app'
  146. app_version = '3.1'
  147. app_description = __doc__
  148. required_config = Namespace()
  149. required_config.namespace('submitter')
  150. required_config.submitter.add_option(
  151. 'delay',
  152. doc="pause between submission queuing in milliseconds",
  153. default='0',
  154. from_string_converter=lambda x: float(x) / 1000.0
  155. )
  156. required_config.submitter.add_option(
  157. 'dry_run',
  158. doc="don't actually submit, just print product/version from raw crash",
  159. short_form='D',
  160. default=False
  161. )
  162. #--------------------------------------------------------------------------
  163. @staticmethod
  164. def get_application_defaults():
  165. return {
  166. "source.crashstorage_class": SubmitterFileSystemWalkerSource,
  167. "destination.crashstorage_class":
  168. 'socorro.collector.breakpad_submitter_utilities'
  169. '.BreakpadPOSTDestination',
  170. "number_of_submissions": "all",
  171. }
  172. #--------------------------------------------------------------------------
  173. def _action_between_each_iteration(self):
  174. if self.config.submitter.delay:
  175. time.sleep(self.config.submitter.delay)
  176. #--------------------------------------------------------------------------
  177. def _action_after_iteration_completes(self):
  178. self.config.logger.info(
  179. 'the queuing iterator is exhausted - waiting to quit'
  180. )
  181. self.task_manager.wait_for_empty_queue(
  182. 5,
  183. "waiting for the queue to drain before quitting"
  184. )
  185. time.sleep(self.config.producer_consumer.number_of_threads * 2)
  186. #--------------------------------------------------------------------------
  187. def _filter_disallowed_values(self, current_value):
  188. """in this base class there are no disallowed values coming from the
  189. iterators. Other users of these iterator may have some standards and
  190. can detect and reject them here"""
  191. return current_value is None
  192. #--------------------------------------------------------------------------
  193. def _transform(self, crash_id):
  194. """this transform function only transfers raw data from the
  195. source to the destination without changing the data."""
  196. if self.config.submitter.dry_run:
  197. print crash_id
  198. else:
  199. raw_crash = self.source.get_raw_crash(crash_id)
  200. dumps = self.source.get_raw_dumps_as_files(crash_id)
  201. self.destination.save_raw_crash_with_file_dumps(
  202. raw_crash,
  203. dumps,
  204. crash_id
  205. )
  206. if __name__ == '__main__':
  207. main(SubmitterApp)