PageRenderTime 1845ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/google/appengine/ext/bulkload/__init__.py

https://github.com/atduskgreg/portable-google-app-engine-sdk
Python | 396 lines | 352 code | 8 blank | 36 comment | 9 complexity | 92e880ce42182275e876d979f49bc93a MD5 | raw file
Possible License(s): BSD-3-Clause
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Google Inc.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """A mix-in handler for bulk loading data into an application.
  18. For complete documentation, see the Tools and Libraries section of the
  19. documentation.
  20. To use this in your app, first write a script, e.g. bulkload.py, that
  21. instantiates a Loader for each entity kind you want to import and call
  22. bulkload.main(instance). For example:
  23. person = bulkload.Loader(
  24. 'Person',
  25. [('name', str),
  26. ('email', datastore_types.Email),
  27. ('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))),
  28. ])
  29. if __name__ == '__main__':
  30. bulkload.main(person)
  31. See the Loader class for more information. Then, add a handler for it in your
  32. app.yaml, e.g.:
  33. urlmap:
  34. - regex: /load
  35. handler:
  36. type: 1
  37. path: bulkload.py
  38. requires_login: true
  39. admin_only: true
  40. Finally, deploy your app and run bulkload_client.py. For example, to load the
  41. file people.csv into a dev_appserver running on your local machine:
  42. ./bulkload_client.py --filename people.csv --kind Person --cookie ... \
  43. --url http://localhost:8080/load
  44. The kind parameter is used to look up the Loader instance that will be used.
  45. The bulkload handler should usually be admin_only, so that non-admins can't use
  46. the shell to modify your app's data. The bulkload client uses the cookie
  47. parameter to piggyback its HTTP requests on your login session. A GET request
  48. to the URL specified for your bulkload script will give you a cookie parameter
  49. you can use (/load in the example above). If your bulkload handler is not
  50. admin_only, you may omit the cookie parameter.
  51. If you want to do extra processing before the entities are stored, you can
  52. subclass Loader and override HandleEntity. HandleEntity is called once with
  53. each entity that is imported from the CSV data. You can return one or more
  54. entities from HandleEntity to be stored in its place, or None if nothing
  55. should be stored.
  56. For example, this loads calendar events and stores them as
  57. datastore_entities.Event entities. It also populates their author field with a
  58. reference to the corresponding datastore_entites.Contact entity. If no Contact
  59. entity exists yet for the given author, it creates one and stores it first.
  60. class EventLoader(bulkload.Loader):
  61. def __init__(self):
  62. EventLoader.__init__(self, 'Event',
  63. [('title', str),
  64. ('creator', str),
  65. ('where', str),
  66. ('startTime', lambda x:
  67. datetime.datetime.fromtimestamp(float(x))),
  68. ])
  69. def HandleEntity(self, entity):
  70. event = datastore_entities.Event(entity.title)
  71. event.update(entity)
  72. creator = event['creator']
  73. if creator:
  74. contact = datastore.Query('Contact', {'title': creator}).Get(1)
  75. if not contact:
  76. contact = [datastore_entities.Contact(creator)]
  77. datastore.Put(contact[0])
  78. event['author'] = contact[0].key()
  79. return event
  80. if __name__ == '__main__':
  81. bulkload.main(EventLoader())
  82. """
  83. import Cookie
  84. import StringIO
  85. import csv
  86. import httplib
  87. import os
  88. import sys
  89. import traceback
  90. import types
  91. import google
  92. import wsgiref.handlers
  93. from google.appengine.api import datastore
  94. from google.appengine.api import datastore_types
  95. from google.appengine.ext import webapp
  96. from google.appengine.ext.bulkload import constants
  97. def Validate(value, type):
  98. """ Checks that value is non-empty and of the right type.
  99. Raises ValueError if value is None or empty, TypeError if it's not the given
  100. type.
  101. Args:
  102. value: any value
  103. type: a type or tuple of types
  104. """
  105. if not value:
  106. raise ValueError('Value should not be empty; received %s.' % value)
  107. elif not isinstance(value, type):
  108. raise TypeError('Expected a %s, but received %s (a %s).' %
  109. (type, value, value.__class__))
  110. class Loader(object):
  111. """ A base class for creating datastore entities from CSV input data.
  112. To add a handler for bulk loading a new entity kind into your datastore,
  113. write a subclass of this class that calls Loader.__init__ from your
  114. class's __init__.
  115. If you need to run extra code to convert entities from CSV, create new
  116. properties, or otherwise modify the entities before they're inserted,
  117. override AddEntity.
  118. """
  119. __loaders = {}
  120. __kind = None
  121. __properties = None
  122. def __init__(self, kind, properties):
  123. """ Constructor.
  124. Populates this Loader's kind and properties map. Also registers it with
  125. the bulk loader, so that all you need to do is instantiate your Loader,
  126. and the bulkload handler will automatically use it.
  127. Args:
  128. kind: a string containing the entity kind that this loader handles
  129. properties: list of (name, converter) tuples.
  130. This is used to automatically convert the CSV columns into properties.
  131. The converter should be a function that takes one argument, a string
  132. value from the CSV file, and returns a correctly typed property value
  133. that should be inserted. The tuples in this list should match the
  134. columns in your CSV file, in order.
  135. For example:
  136. [('name', str),
  137. ('id_number', int),
  138. ('email', datastore_types.Email),
  139. ('user', users.User),
  140. ('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))),
  141. ('description', datastore_types.Text),
  142. ]
  143. """
  144. Validate(kind, basestring)
  145. self.__kind = kind
  146. Validate(properties, list)
  147. for name, fn in properties:
  148. Validate(name, basestring)
  149. assert callable(fn), (
  150. 'Conversion function %s for property %s is not callable.' % (fn, name))
  151. self.__properties = properties
  152. Loader.__loaders[kind] = self
  153. def kind(self):
  154. """ Return the entity kind that this Loader handes.
  155. """
  156. return self.__kind
  157. def CreateEntity(self, values):
  158. """ Creates an entity from a list of property values.
  159. Args:
  160. values: list of str
  161. Returns:
  162. list of datastore.Entity
  163. The returned entities are populated with the property values from the
  164. argument, converted to native types using the properties map given in
  165. the constructor, and passed through HandleEntity. They're ready to be
  166. inserted.
  167. Raises an AssertionError if the number of values doesn't match the number
  168. of properties in the properties map.
  169. """
  170. Validate(values, list)
  171. assert len(values) == len(self.__properties), (
  172. 'Expected %d CSV columns, found %d.' %
  173. (len(self.__properties), len(values)))
  174. entity = datastore.Entity(self.__kind)
  175. for (name, converter), val in zip(self.__properties, values):
  176. entity[name] = converter(val)
  177. entities = self.HandleEntity(entity)
  178. if entities is not None:
  179. if not isinstance(entities, list):
  180. entities = [entities]
  181. for entity in entities:
  182. if not isinstance(entity, datastore.Entity):
  183. raise TypeError('Expected a datastore.Entity, received %s (a %s).' %
  184. (entity, entity.__class__))
  185. return entities
  186. def HandleEntity(self, entity):
  187. """ Subclasses can override this to add custom entity conversion code.
  188. This is called for each entity, after its properties are populated from
  189. CSV but before it is stored. Subclasses can override this to add custom
  190. entity handling code.
  191. The entity to be inserted should be returned. If multiple entities should
  192. be inserted, return a list of entities. If no entities should be inserted,
  193. return None or [].
  194. Args:
  195. entity: datastore.Entity
  196. Returns:
  197. datastore.Entity or list of datastore.Entity
  198. """
  199. return entity
  200. @staticmethod
  201. def RegisteredLoaders():
  202. """ Returns a list of the Loader instances that have been created.
  203. """
  204. return dict(Loader.__loaders)
  205. class BulkLoad(webapp.RequestHandler):
  206. """ A handler for bulk load requests.
  207. """
  208. def get(self):
  209. """ Handle a GET. Just show an info page.
  210. """
  211. page = self.InfoPage(self.request.uri)
  212. self.response.out.write(page)
  213. def post(self):
  214. """ Handle a POST. Reads CSV data, converts to entities, and stores them.
  215. """
  216. self.response.headers['Content-Type'] = 'text/plain'
  217. response, output = self.Load(self.request.get(constants.KIND_PARAM),
  218. self.request.get(constants.CSV_PARAM))
  219. self.response.set_status(response)
  220. self.response.out.write(output)
  221. def InfoPage(self, uri):
  222. """ Renders an information page with the POST endpoint and cookie flag.
  223. Args:
  224. uri: a string containing the request URI
  225. Returns:
  226. A string with the contents of the info page to be displayed
  227. """
  228. page = """
  229. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  230. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  231. <html><head>
  232. <title>Bulk Loader</title>
  233. </head><body>"""
  234. page += ('The bulk load endpoint is: <a href="%s">%s</a><br />\n' %
  235. (uri, uri))
  236. cookies = os.environ.get('HTTP_COOKIE', None)
  237. if cookies:
  238. cookie = Cookie.BaseCookie(cookies)
  239. for param in ['ACSID', 'dev_appserver_login']:
  240. value = cookie.get(param)
  241. if value:
  242. page += ("Pass this flag to the client: --cookie='%s=%s'\n" %
  243. (param, value.value))
  244. break
  245. else:
  246. page += 'No cookie found!\n'
  247. page += '</body></html>'
  248. return page
  249. def Load(self, kind, data):
  250. """ Parses CSV data, uses a Loader to convert to entities, and stores them.
  251. On error, fails fast. Returns a "bad request" HTTP response code and
  252. includes the traceback in the output.
  253. Args:
  254. kind: a string containing the entity kind that this loader handles
  255. data: a string containing the CSV data to load
  256. Returns:
  257. tuple (response code, output) where:
  258. response code: integer HTTP response code to return
  259. output: string containing the HTTP response body
  260. """
  261. Validate(kind, basestring)
  262. Validate(data, basestring)
  263. output = []
  264. try:
  265. loader = Loader.RegisteredLoaders()[kind]
  266. except KeyError:
  267. output.append('Error: no Loader defined for kind %s.' % kind)
  268. return (httplib.BAD_REQUEST, ''.join(output))
  269. buffer = StringIO.StringIO(data)
  270. reader = csv.reader(buffer, skipinitialspace=True)
  271. entities = []
  272. line_num = 1
  273. for columns in reader:
  274. if columns:
  275. try:
  276. output.append('\nLoading from line %d...' % line_num)
  277. entities.extend(loader.CreateEntity(columns))
  278. output.append('done.')
  279. except:
  280. exc_info = sys.exc_info()
  281. stacktrace = traceback.format_exception(*exc_info)
  282. output.append('error:\n%s' % stacktrace)
  283. return (httplib.BAD_REQUEST, ''.join(output))
  284. line_num += 1
  285. for entity in entities:
  286. datastore.Put(entity)
  287. return (httplib.OK, ''.join(output))
  288. def main(*loaders):
  289. """Starts bulk upload.
  290. Raises TypeError if not, at least one Loader instance is given.
  291. Args:
  292. loaders: One or more Loader instance.
  293. """
  294. if not loaders:
  295. raise TypeError('Expected at least one argument.')
  296. for loader in loaders:
  297. if not isinstance(loader, Loader):
  298. raise TypeError('Expected a Loader instance; received %r' % loader)
  299. application = webapp.WSGIApplication([('.*', BulkLoad)])
  300. wsgiref.handlers.CGIHandler().run(application)
  301. if __name__ == '__main__':
  302. main()