PageRenderTime 41ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/google_appengine/google/appengine/ext/mapreduce/status.py

https://bitbucket.org/ikelkar/ece1779_project2
Python | 392 lines | 197 code | 83 blank | 112 comment | 30 complexity | 181f980a412443283d818c5245c9353b MD5 | raw file
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Google Inc.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Status page handler for mapreduce framework."""
  18. import os
  19. import time
  20. from google.appengine.api import validation
  21. from google.appengine.api import yaml_builder
  22. from google.appengine.api import yaml_errors
  23. from google.appengine.api import yaml_listener
  24. from google.appengine.api import yaml_object
  25. from google.appengine.ext import db
  26. from google.appengine.ext.mapreduce import base_handler
  27. from google.appengine.ext.mapreduce import errors
  28. from google.appengine.ext.mapreduce import model
  29. MR_YAML_NAMES = ["mapreduce.yaml", "mapreduce.yml"]
  30. class BadStatusParameterError(Exception):
  31. """A parameter passed to a status handler was invalid."""
  32. class UserParam(validation.Validated):
  33. """A user-supplied parameter to a mapreduce job."""
  34. ATTRIBUTES = {
  35. "name": r"[a-zA-Z0-9_\.]+",
  36. "default": validation.Optional(r".*"),
  37. "value": validation.Optional(r".*"),
  38. }
  39. class MapperInfo(validation.Validated):
  40. """Configuration parameters for the mapper part of the job."""
  41. ATTRIBUTES = {
  42. "handler": r".+",
  43. "input_reader": r".+",
  44. "output_writer": validation.Optional(r".+"),
  45. "params": validation.Optional(validation.Repeated(UserParam)),
  46. "params_validator": validation.Optional(r".+"),
  47. }
  48. class MapreduceInfo(validation.Validated):
  49. """Mapreduce description in mapreduce.yaml."""
  50. ATTRIBUTES = {
  51. "name": r".+",
  52. "mapper": MapperInfo,
  53. "params": validation.Optional(validation.Repeated(UserParam)),
  54. "params_validator": validation.Optional(r".+"),
  55. }
  56. class MapReduceYaml(validation.Validated):
  57. """Root class for mapreduce.yaml.
  58. File format:
  59. mapreduce:
  60. - name: <mapreduce_name>
  61. mapper:
  62. - input_reader: google.appengine.ext.mapreduce.DatastoreInputReader
  63. - handler: path_to_my.MapperFunction
  64. - params:
  65. - name: foo
  66. default: bar
  67. - name: blah
  68. default: stuff
  69. - params_validator: path_to_my.ValidatorFunction
  70. Where
  71. mapreduce_name: The name of the mapreduce. Used for UI purposes.
  72. mapper_handler_spec: Full <module_name>.<function_name/class_name> of
  73. mapper handler. See MapreduceSpec class documentation for full handler
  74. specification.
  75. input_reader: Full <module_name>.<function_name/class_name> of the
  76. InputReader sub-class to use for the mapper job.
  77. params: A list of optional parameter names and optional default values
  78. that may be supplied or overridden by the user running the job.
  79. params_validator is full <module_name>.<function_name/class_name> of
  80. a callable to validate the mapper_params after they are input by the
  81. user running the job.
  82. """
  83. ATTRIBUTES = {
  84. "mapreduce": validation.Optional(validation.Repeated(MapreduceInfo))
  85. }
  86. @staticmethod
  87. def to_dict(mapreduce_yaml):
  88. """Converts a MapReduceYaml file into a JSON-encodable dictionary.
  89. For use in user-visible UI and internal methods for interfacing with
  90. user code (like param validation). as a list
  91. Args:
  92. mapreduce_yaml: The Pyton representation of the mapreduce.yaml document.
  93. Returns:
  94. A list of configuration dictionaries.
  95. """
  96. all_configs = []
  97. for config in mapreduce_yaml.mapreduce:
  98. out = {
  99. "name": config.name,
  100. "mapper_input_reader": config.mapper.input_reader,
  101. "mapper_handler": config.mapper.handler,
  102. }
  103. if config.mapper.params_validator:
  104. out["mapper_params_validator"] = config.mapper.params_validator
  105. if config.mapper.params:
  106. param_defaults = {}
  107. for param in config.mapper.params:
  108. param_defaults[param.name] = param.default or param.value
  109. out["mapper_params"] = param_defaults
  110. if config.params:
  111. param_defaults = {}
  112. for param in config.params:
  113. param_defaults[param.name] = param.default or param.value
  114. out["params"] = param_defaults
  115. if config.mapper.output_writer:
  116. out["mapper_output_writer"] = config.mapper.output_writer
  117. all_configs.append(out)
  118. return all_configs
  119. def find_mapreduce_yaml(status_file=__file__):
  120. """Traverse directory trees to find mapreduce.yaml file.
  121. Begins with the location of status.py and then moves on to check the working
  122. directory.
  123. Args:
  124. status_file: location of status.py, overridable for testing purposes.
  125. Returns:
  126. the path of mapreduce.yaml file or None if not found.
  127. """
  128. checked = set()
  129. yaml = _find_mapreduce_yaml(os.path.dirname(status_file), checked)
  130. if not yaml:
  131. yaml = _find_mapreduce_yaml(os.getcwd(), checked)
  132. return yaml
  133. def _find_mapreduce_yaml(start, checked):
  134. """Traverse the directory tree identified by start until a directory already
  135. in checked is encountered or the path of mapreduce.yaml is found.
  136. Checked is present both to make loop termination easy to reason about and so
  137. that the same directories do not get rechecked.
  138. Args:
  139. start: the path to start in and work upward from
  140. checked: the set of already examined directories
  141. Returns:
  142. the path of mapreduce.yaml file or None if not found.
  143. """
  144. dir = start
  145. while dir not in checked:
  146. checked.add(dir)
  147. for mr_yaml_name in MR_YAML_NAMES:
  148. yaml_path = os.path.join(dir, mr_yaml_name)
  149. if os.path.exists(yaml_path):
  150. return yaml_path
  151. dir = os.path.dirname(dir)
  152. return None
  153. def parse_mapreduce_yaml(contents):
  154. """Parses mapreduce.yaml file contents.
  155. Args:
  156. contents: mapreduce.yaml file contents.
  157. Returns:
  158. MapReduceYaml object with all the data from original file.
  159. Raises:
  160. errors.BadYamlError: when contents is not a valid mapreduce.yaml file.
  161. """
  162. try:
  163. builder = yaml_object.ObjectBuilder(MapReduceYaml)
  164. handler = yaml_builder.BuilderHandler(builder)
  165. listener = yaml_listener.EventListener(handler)
  166. listener.Parse(contents)
  167. mr_info = handler.GetResults()
  168. except (ValueError, yaml_errors.EventError), e:
  169. raise errors.BadYamlError(e)
  170. if len(mr_info) < 1:
  171. raise errors.BadYamlError("No configs found in mapreduce.yaml")
  172. if len(mr_info) > 1:
  173. raise errors.MultipleDocumentsInMrYaml("Found %d YAML documents" %
  174. len(mr_info))
  175. jobs = mr_info[0]
  176. job_names = set(j.name for j in jobs.mapreduce)
  177. if len(jobs.mapreduce) != len(job_names):
  178. raise errors.BadYamlError(
  179. "Overlapping mapreduce names; names must be unique")
  180. return jobs
  181. def get_mapreduce_yaml(parse=parse_mapreduce_yaml):
  182. """Locates mapreduce.yaml, loads and parses its info.
  183. Args:
  184. parse: Used for testing.
  185. Returns:
  186. MapReduceYaml object.
  187. Raises:
  188. errors.BadYamlError: when contents is not a valid mapreduce.yaml file or the
  189. file is missing.
  190. """
  191. mr_yaml_path = find_mapreduce_yaml()
  192. if not mr_yaml_path:
  193. raise errors.MissingYamlError()
  194. mr_yaml_file = open(mr_yaml_path)
  195. try:
  196. return parse(mr_yaml_file.read())
  197. finally:
  198. mr_yaml_file.close()
  199. class ResourceHandler(base_handler.BaseHandler):
  200. """Handler for static resources."""
  201. _RESOURCE_MAP = {
  202. "status": ("overview.html", "text/html"),
  203. "detail": ("detail.html", "text/html"),
  204. "base.css": ("base.css", "text/css"),
  205. "jquery.js": ("jquery-1.6.1.min.js", "text/javascript"),
  206. "jquery-json.js": ("jquery.json-2.2.min.js", "text/javascript"),
  207. "status.js": ("status.js", "text/javascript"),
  208. }
  209. def get(self, relative):
  210. if relative not in self._RESOURCE_MAP:
  211. self.response.set_status(404)
  212. self.response.out.write("Resource not found.")
  213. return
  214. real_path, content_type = self._RESOURCE_MAP[relative]
  215. path = os.path.join(os.path.dirname(__file__), "static", real_path)
  216. self.response.headers["Cache-Control"] = "public; max-age=300"
  217. self.response.headers["Content-Type"] = content_type
  218. self.response.out.write(open(path).read())
  219. class ListConfigsHandler(base_handler.GetJsonHandler):
  220. """Lists mapreduce configs as JSON for users to start jobs."""
  221. def handle(self):
  222. self.json_response["configs"] = MapReduceYaml.to_dict(get_mapreduce_yaml())
  223. class ListJobsHandler(base_handler.GetJsonHandler):
  224. """Lists running and completed mapreduce jobs for an overview as JSON."""
  225. def handle(self):
  226. cursor = self.request.get("cursor")
  227. count = int(self.request.get("count", "50"))
  228. query = model.MapreduceState.all()
  229. if cursor:
  230. query.filter("__key__ >=", db.Key(cursor))
  231. query.order("__key__")
  232. jobs_list = query.fetch(count + 1)
  233. if len(jobs_list) == (count + 1):
  234. self.json_response["cursor"] = str(jobs_list[-1].key())
  235. jobs_list = jobs_list[:-1]
  236. all_jobs = []
  237. for job in jobs_list:
  238. out = {
  239. "name": job.mapreduce_spec.name,
  240. "mapreduce_id": job.mapreduce_spec.mapreduce_id,
  241. "active": job.active,
  242. "start_timestamp_ms":
  243. int(time.mktime(job.start_time.utctimetuple()) * 1000),
  244. "updated_timestamp_ms":
  245. int(time.mktime(job.last_poll_time.utctimetuple()) * 1000),
  246. "chart_url": job.sparkline_url,
  247. "chart_width": job.chart_width,
  248. "active_shards": job.active_shards,
  249. "shards": job.mapreduce_spec.mapper.shard_count,
  250. }
  251. if job.result_status:
  252. out["result_status"] = job.result_status
  253. all_jobs.append(out)
  254. self.json_response["jobs"] = all_jobs
  255. class GetJobDetailHandler(base_handler.GetJsonHandler):
  256. """Retrieves the details of a mapreduce job as JSON."""
  257. def handle(self):
  258. mapreduce_id = self.request.get("mapreduce_id")
  259. if not mapreduce_id:
  260. raise BadStatusParameterError("'mapreduce_id' was invalid")
  261. job = model.MapreduceState.get_by_key_name(mapreduce_id)
  262. if job is None:
  263. raise KeyError("Could not find job with ID %r" % mapreduce_id)
  264. self.json_response.update(job.mapreduce_spec.to_json())
  265. self.json_response.update(job.counters_map.to_json())
  266. self.json_response.update({
  267. "active": job.active,
  268. "start_timestamp_ms":
  269. int(time.mktime(job.start_time.utctimetuple()) * 1000),
  270. "updated_timestamp_ms":
  271. int(time.mktime(job.last_poll_time.utctimetuple()) * 1000),
  272. "chart_url": job.chart_url,
  273. })
  274. self.json_response["result_status"] = job.result_status
  275. shards_list = model.ShardState.find_by_mapreduce_state(job)
  276. all_shards = []
  277. shards_list.sort(key=lambda x: x.shard_number)
  278. for shard in shards_list:
  279. out = {
  280. "active": shard.active,
  281. "result_status": shard.result_status,
  282. "shard_number": shard.shard_number,
  283. "shard_id": shard.shard_id,
  284. "updated_timestamp_ms":
  285. int(time.mktime(shard.update_time.utctimetuple()) * 1000),
  286. "shard_description": shard.shard_description,
  287. "last_work_item": shard.last_work_item,
  288. }
  289. out.update(shard.counters_map.to_json())
  290. all_shards.append(out)
  291. self.json_response["shards"] = all_shards