PageRenderTime 55ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/flows/general/transfer.py

https://code.google.com/
Python | 345 lines | 175 code | 65 blank | 105 comment | 29 complexity | 7c62fefb9b95e5b505b0df483c9c105e MD5 | raw file
Possible License(s): Apache-2.0
  1. #!/usr/bin/env python
  2. # Copyright 2011 Google Inc.
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """These flows are designed for high performance transfers."""
  15. import hashlib
  16. import stat
  17. import time
  18. import zlib
  19. import logging
  20. from grr.lib import aff4
  21. from grr.lib import flow
  22. from grr.lib import utils
  23. from grr.proto import jobs_pb2
  24. class GetFile(flow.GRRFlow):
  25. """An efficient file transfer mechanism.
  26. Returns to parent flow:
  27. A jobs_pb2.Path.
  28. """
  29. category = "/Filesystem/"
  30. out_protobuf = jobs_pb2.StatResponse
  31. # Read in 512kb chunks
  32. _CHUNK_SIZE = 512 * 1024
  33. # We have a maximum of this many chunk reads outstanding (about 10mb)
  34. _WINDOW_SIZE = 200
  35. current_chunk_number = 0
  36. max_chunk_number = 2
  37. def __init__(self, path="/",
  38. pathtype=utils.ProtoEnum(jobs_pb2.Path, "PathType", "OS"),
  39. pathspec=None, **kwargs):
  40. """Constructor.
  41. This flow uses chunking and hashes to de-duplicate data and send it
  42. efficiently.
  43. Args:
  44. path: The directory path to list.
  45. pathtype: Identifies requested path type. Enum from Path protobuf.
  46. pathspec: This flow also accepts all the information in one pathspec.
  47. which is preferred over the path and pathtype definition
  48. """
  49. self.urn = None
  50. if pathspec:
  51. self.pathspec = utils.Pathspec(pathspec)
  52. else:
  53. self.pathspec = utils.Pathspec(path=path, pathtype=int(pathtype))
  54. flow.GRRFlow.__init__(self, **kwargs)
  55. @flow.StateHandler(next_state=["Stat", "ReadBuffer"])
  56. def Start(self):
  57. """Get information about the file from the client."""
  58. self.CallClient("StatFile", pathspec=self.pathspec.ToProto(),
  59. next_state="Stat")
  60. # Read the first buffer
  61. self.FetchWindow(self.max_chunk_number)
  62. @flow.StateHandler()
  63. def Stat(self, responses):
  64. """Fix up the pathspec of the file."""
  65. if responses.success:
  66. self.stat = responses.First()
  67. self.pathspec = utils.Pathspec(self.stat.pathspec)
  68. else:
  69. raise IOError("Error: %s" % responses.status)
  70. def FetchWindow(self, number_of_chunks_to_readahead):
  71. """Read ahead a number of buffers to fill the window."""
  72. for _ in range(number_of_chunks_to_readahead):
  73. # Do not read past the end of file
  74. if self.current_chunk_number > self.max_chunk_number:
  75. return
  76. self.CallClient("TransferBuffer", pathspec=self.pathspec.ToProto(),
  77. offset=self.current_chunk_number * self._CHUNK_SIZE,
  78. length=self._CHUNK_SIZE, next_state="ReadBuffer")
  79. self.current_chunk_number += 1
  80. @flow.StateHandler(next_state="ReadBuffer")
  81. def ReadBuffer(self, responses):
  82. """Read the buffer and write to the file."""
  83. # Did it work?
  84. if responses.success:
  85. response = responses.First()
  86. if not response:
  87. raise IOError("Missing hash for offset %s missing" % response.offset)
  88. if response.offset == 0:
  89. # Force creation of the new AFF4 object (Note that this is pinned on the
  90. # client id - i.e. the client can not change aff4 objects outside its
  91. # tree).
  92. self.urn = aff4.AFF4Object.VFSGRRClient.PathspecToURN(
  93. self.pathspec, self.client_id)
  94. self.stat.aff4path = utils.SmartUnicode(self.urn)
  95. # Create a new Hash image for the data. Note that this object is pickled
  96. # with this flow between states.
  97. self.fd = aff4.FACTORY.Create(self.urn, "HashImage", token=self.token)
  98. # The chunksize must be set to be the same as the transfer chunk size.
  99. self.fd.Set(self.fd.Schema.CHUNKSIZE(self._CHUNK_SIZE))
  100. self.fd.Set(self.fd.Schema.STAT(self.stat))
  101. self.max_chunk_number = self.stat.st_size / self._CHUNK_SIZE
  102. # Fill up the window with requests
  103. self.FetchWindow(self._WINDOW_SIZE)
  104. # Write the hash to the index. Note that response.data is the hash of the
  105. # block (32 bytes) and response.length is the length of the block.
  106. self.fd.AddBlob(response.data, response.length)
  107. self.Log("Received blob hash %s", response.data.encode("hex"))
  108. self.Status("Received %s bytes", self.fd.size)
  109. # Add one more chunk to the window.
  110. self.FetchWindow(1)
  111. def End(self):
  112. """Finalize reading the file."""
  113. if self.urn is None:
  114. self.Notify("ViewObject", self.client_id, "File failed to be transferred")
  115. else:
  116. self.Notify("ViewObject", self.urn, "File transferred successfully")
  117. self.Log("Finished reading %s", self.urn)
  118. self.Log("Flow Completed in %s seconds",
  119. time.time() - self.flow_pb.create_time/1e6)
  120. # Notify any parent flows the file is ready to be used now.
  121. self.SendReply(self.fd.Get(self.fd.Schema.STAT))
  122. self.fd.Close(sync=True)
  123. class TransferStore(flow.WellKnownFlow):
  124. """Store a buffer into a determined location."""
  125. well_known_session_id = "W:TransferStore"
  126. def ProcessMessage(self, message):
  127. """Write the blob into the AFF4 blob storage area."""
  128. # Check that the message is authenticated
  129. if message.auth_state != jobs_pb2.GrrMessage.AUTHENTICATED:
  130. logging.error("TransferStore request from %s is not authenticated.",
  131. message.source)
  132. return
  133. read_buffer = jobs_pb2.DataBlob()
  134. read_buffer.ParseFromString(message.args)
  135. # Only store non empty buffers
  136. if read_buffer.data:
  137. data = read_buffer.data
  138. if read_buffer.compression == jobs_pb2.DataBlob.ZCOMPRESSION:
  139. cdata = data
  140. data = zlib.decompress(cdata)
  141. elif read_buffer.compression == jobs_pb2.DataBlob.UNCOMPRESSED:
  142. cdata = zlib.compress(data)
  143. else:
  144. raise RuntimeError("Unsupported compression")
  145. # The hash is done on the uncompressed data
  146. digest = hashlib.sha256(data).digest()
  147. urn = aff4.ROOT_URN.Add("blobs").Add(digest.encode("hex"))
  148. # Write the blob to the data store. We cheat here and just store the
  149. # compressed data to avoid recompressing it.
  150. fd = aff4.FACTORY.Create(urn, "AFF4MemoryStream", mode="w",
  151. token=self.token)
  152. fd.Set(fd.Schema.CONTENT(cdata))
  153. fd.Set(fd.Schema.SIZE(len(data)))
  154. fd.Close(sync=False)
  155. logging.info("Got blob %s (length %s)", digest.encode("hex"),
  156. len(cdata))
  157. class FileDownloader(flow.GRRFlow):
  158. """Handle the automated collection of multiple files.
  159. This class contains the logic to automatically collect and store a set
  160. of files and directories.
  161. Classes that want to implement this functionality for a specific
  162. set of files should inherit from it and override __init__ and set
  163. self.findspecs to something appropriate.
  164. Alternatively they can override GetFindSpecs for simple cases.
  165. Returns to parent flow:
  166. A StatResponse protobuf for each downloaded file.
  167. """
  168. out_protobuf = jobs_pb2.StatResponse
  169. def __init__(self, findspecs=None, **kwargs):
  170. """Determine the usable findspecs.
  171. Args:
  172. findspecs: A list of jobs_pb2.Find protos. If None, self.GetFindSpecs
  173. will be called to get the specs.
  174. """
  175. flow.GRRFlow.__init__(self, **kwargs)
  176. self.findspecs = findspecs
  177. @flow.StateHandler(next_state=["DownloadFiles"])
  178. def Start(self):
  179. """Queue flows for all valid find specs."""
  180. if self.findspecs is None:
  181. # Call GetFindSpecs, should be overridden by inheriting classes.
  182. self.findspecs = list(self.GetFindSpecs())
  183. if not self.findspecs:
  184. self.Log("No usable specs found.")
  185. self.Terminate()
  186. for findspec in self.findspecs:
  187. self.CallFlow("FindFiles", next_state="DownloadFiles",
  188. findspec=findspec, output=None)
  189. @flow.StateHandler(jobs_pb2.StatResponse, next_state="HandleDownloadedFiles")
  190. def DownloadFiles(self, responses):
  191. """For each file found in the resulting collection, download it."""
  192. if responses.success:
  193. count = 0
  194. for response in responses:
  195. # Only download regular files.
  196. if stat.S_ISREG(response.st_mode):
  197. count += 1
  198. self.CallFlow("GetFile",
  199. next_state="HandleDownloadedFiles",
  200. pathspec=response.pathspec)
  201. self.Log("Scheduling download of %d files", count)
  202. else:
  203. self.Log("Find failed %s", responses.status)
  204. @flow.StateHandler(jobs_pb2.StatResponse)
  205. def HandleDownloadedFiles(self, responses):
  206. """Handle the Stats that come back from the GetFile calls."""
  207. if responses.success:
  208. # GetFile returns a list of StatResponse protos.
  209. for response_stat in responses:
  210. self.Log("Downloaded %s", response_stat.pathspec)
  211. self.SendReply(response_stat)
  212. else:
  213. self.Log("Download of file %s failed %s",
  214. responses.GetRequestArgPb().pathspec, responses.status)
  215. def GetFindSpecs(self):
  216. """Returns iterable of jobs_pb2.Find objects. Should be overridden."""
  217. return []
  218. class FileCollector(flow.GRRFlow):
  219. """Flow to create a collection from downloaded files.
  220. This flow calls the FileDownloader and creates a collection for the results.
  221. Returns to the parent flow:
  222. A StatResponse protobuf describing the output collection.
  223. """
  224. out_protobuf = jobs_pb2.StatResponse
  225. def __init__(self, findspecs=None,
  226. output="analysis/collect/{u}-{t}", **kwargs):
  227. """Download all files matching the findspecs and generate a collection.
  228. Args:
  229. findspecs: A list of jobs_pb2.Find protos. If None, self.GetFindSpecs
  230. will be called to get the specs.
  231. output: If set, a URN to an AFF4Collection to add each result to.
  232. This will create the collection if it does not exist.
  233. """
  234. flow.GRRFlow.__init__(self, **kwargs)
  235. # Expand special escapes.
  236. output = output.format(t=time.time(), u=self.user)
  237. self.output = aff4.ROOT_URN.Add(self.client_id).Add(output)
  238. self.collection_list = None
  239. self.fd = aff4.FACTORY.Create(self.output, "AFF4Collection", mode="rw",
  240. token=self.token)
  241. self.Log("Created output collection %s", self.output)
  242. self.fd.Set(self.fd.Schema.DESCRIPTION("CollectFiles {0}".format(
  243. self.__class__.__name__)))
  244. # Append to the collection if needed.
  245. self.collection_list = self.fd.Get(self.fd.Schema.COLLECTION)
  246. self.findspecs = findspecs
  247. @flow.StateHandler(next_state="WriteCollection")
  248. def Start(self):
  249. if self.findspecs:
  250. # Just call the FileDownloader with these findspecs
  251. self.CallFlow("FileDownloader", findspecs=self.findspecs,
  252. next_state="WriteCollection")
  253. else:
  254. self.Log("No findspecs to run.")
  255. @flow.StateHandler()
  256. def WriteCollection(self, responses):
  257. """Adds the results to the collection."""
  258. for response_stat in responses:
  259. self.collection_list.Append(response_stat)
  260. self.fd.Set(self.fd.Schema.COLLECTION, self.collection_list)
  261. self.fd.Close(True)
  262. # Tell our caller about the new collection.
  263. self.SendReply(jobs_pb2.StatResponse(
  264. aff4path=utils.SmartUnicode(self.fd.urn)))
  265. @flow.StateHandler()
  266. def End(self):
  267. # Notify our creator.
  268. num_files = len(self.collection_list)
  269. self.Notify("ViewObject", self.output,
  270. "Completed download of {0:d} files.".format(num_files))