PageRenderTime 63ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/ipf/publish.py

https://bitbucket.org/wwsmith/ipf
Python | 350 lines | 300 code | 17 blank | 33 comment | 29 complexity | 64b1b1b0018273ace6cc42782f8e794b MD5 | raw file
  1. ###############################################################################
  2. # Copyright 2011-2015 The University of Texas at Austin #
  3. # #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); #
  5. # you may not use this file except in compliance with the License. #
  6. # You may obtain a copy of the License at #
  7. # #
  8. # http://www.apache.org/licenses/LICENSE-2.0 #
  9. # #
  10. # Unless required by applicable law or agreed to in writing, software #
  11. # distributed under the License is distributed on an "AS IS" BASIS, #
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
  13. # See the License for the specific language governing permissions and #
  14. # limitations under the License. #
  15. ###############################################################################
  16. import httplib
  17. import os
  18. import random
  19. import ssl
  20. import sys
  21. import threading
  22. import time
  23. from Queue import Empty
  24. from ipf.error import NoMoreInputsError, StepError
  25. from ipf.paths import IPF_ETC_PATH, IPF_VAR_PATH
  26. from ipf.step import PublishStep # won't need in a bit
  27. from ipf.step import TriggerStep
  28. import amqp
  29. #######################################################################################################################
  30. class FileStep(PublishStep):
  31. def __init__(self):
  32. PublishStep.__init__(self)
  33. self.description = "publishes documents by writing them to a file"
  34. self.time_out = 5
  35. self._acceptParameter("path",
  36. "Path to the file to write. If the path is relative, it is relative to IPF_VAR_PATH",
  37. True)
  38. self._acceptParameter("append",
  39. "Whether to append to the file or to overwrite it (default is overwrite).",
  40. False)
  41. def _publish(self, representation):
  42. if self.params.get("append",False):
  43. self.info("appending %s",representation)
  44. f = open(self._getPath(),"a")
  45. f.write(representation.get())
  46. f.close()
  47. else:
  48. self.info("writing %s",representation)
  49. f = open(self._getPath()+".new","w")
  50. f.write(representation.get())
  51. f.close()
  52. os.rename(self._getPath()+".new",self._getPath())
  53. def _getPath(self):
  54. try:
  55. path = self.params["path"]
  56. except KeyError:
  57. raise StepError("path parameter not specified")
  58. if os.path.isabs(path):
  59. return path
  60. return os.path.join(IPF_VAR_PATH,path)
  61. #######################################################################################################################
  62. # There is a hang problem that comes up now and then, particularly with long-lived connections:
  63. #
  64. # * ssl connection to server
  65. # * the connection is lost to the server
  66. # * e.g. network outage
  67. # * or for testing, suspending the virtual machine running the RabbitMQ service
  68. # What happens is:
  69. # * the basic_publish returns, but no message is sent
  70. # * amqp.Connection.close() hangs
  71. # * heartbeats aren't being sent, so that times out
  72. #
  73. # Approach is to:
  74. # * use heartbeats to detect the connection is down
  75. # * call close() in a separate thread
  76. #
  77. # publisher confirms don't help since the wait() for the confirm could just hang
  78. class AmqpStep(PublishStep):
  79. def __init__(self):
  80. PublishStep.__init__(self)
  81. self.description = "publishes documents via AMQP"
  82. self.time_out = 5
  83. self._acceptParameter("services","A list of services to try to connect to. Each item is host[:port]. If no port is specified, port 5672 will be used for TCP connections and port 5671 will be used for SSL connections.",True)
  84. self._acceptParameter("username","the user to authenticate as",False)
  85. self._acceptParameter("password","the password to authenticate with",False)
  86. self._acceptParameter("ssl_options","A dictionary containing the SSL options to use to connect. See the Python ssl.wrap_socket function for keys and values. Any relative path names are relative to a path in $IPF_WORKFLOW_PATH",False)
  87. self._acceptParameter("vhost","the AMQP virtual host to connect to",False)
  88. self._acceptParameter("exchange","the AMQP exchange to publish to",False)
  89. self.services = []
  90. self.username = None
  91. self.password = None
  92. self.ssl_options = None
  93. self.vhost = None
  94. self.exchange = None
  95. self.cur_service = None
  96. self.connection = None
  97. self.channel = None
  98. def run(self):
  99. try:
  100. self.services = self.params["services"]
  101. except KeyError:
  102. raise StepError("services parameter not specified")
  103. try:
  104. self.username = self.params["username"].encode("utf-8")
  105. except KeyError:
  106. self.username = "guest"
  107. try:
  108. self.password = self.params["password"].encode("utf-8")
  109. except KeyError:
  110. self.password = "guest"
  111. if "ssl_options" in self.params:
  112. self.ssl_options = {}
  113. for (key,value) in self.params["ssl_options"].iteritems():
  114. self.ssl_options[key.encode("utf-8")] = value.encode("utf-8")
  115. try:
  116. if not os.path.isabs(self.ssl_options["keyfile"]):
  117. self.ssl_options["keyfile"] = os.path.join(IPF_ETC_PATH,self.ssl_options["keyfile"])
  118. except KeyError:
  119. pass
  120. try:
  121. if not os.path.isabs(self.ssl_options["certfile"]):
  122. self.ssl_options["certfile"] = os.path.join(IPF_ETC_PATH,self.ssl_options["certfile"])
  123. except KeyError:
  124. pass
  125. try:
  126. if not os.path.isabs(self.ssl_options["ca_certs"]):
  127. self.ssl_options["ca_certs"] = os.path.join(IPF_ETC_PATH,self.ssl_options["ca_certs"])
  128. except KeyError:
  129. pass
  130. if "ca_certs" in self.ssl_options and "cert_reqs" not in self.ssl_options:
  131. self.ssl_options["cert_reqs"] = ssl.CERT_REQUIRED
  132. try:
  133. self.vhost = self.params["vhost"].encode("utf-8")
  134. except KeyError:
  135. self.vhost = "/"
  136. try:
  137. self.exchange = self.params["exchange"].encode("utf-8")
  138. except KeyError:
  139. self.exchange = ""
  140. # don't use PublishStep.run since we need to handle AMQP heartbeats
  141. while True:
  142. try:
  143. data = self.input_queue.get(True,5)
  144. if data == None:
  145. break
  146. for rep_class in self.publish:
  147. if rep_class.data_cls != data.__class__:
  148. continue
  149. rep = rep_class(data)
  150. self._publish(rep)
  151. break
  152. except Empty:
  153. pass
  154. if self.connection is None:
  155. continue
  156. # quick wait with no allowed_mthods to get heartbeats from the server
  157. # hack since amqp.Connection.wait() doesn't have a timeout argument
  158. try:
  159. self.connection._wait_multiple({self.channel.channel_id:self.channel},[],1)
  160. except:
  161. # timeouts are expected
  162. pass
  163. try:
  164. self.connection.heartbeat_tick()
  165. except amqp.ConnectionForced:
  166. self.warning("closing connection - missed too many heartbeats")
  167. self._close()
  168. self._close()
  169. def _publish(self, representation):
  170. self.info("publishing %s",representation)
  171. self.debug(" with routing key '%s' to exchange '%s'",representation.data.id.encode("utf-8"),self.exchange)
  172. try:
  173. self._publishOnce(representation)
  174. except Exception, e:
  175. self.info("first publish failed: %s",e)
  176. try:
  177. self._publishOnce(representation)
  178. except Exception, e:
  179. self.error("publishing failed twice - discarding data: %s",e)
  180. def _publishOnce(self, representation):
  181. try:
  182. self._connectIfNecessary()
  183. except StepError:
  184. raise StepError("not connected to any service, will not publish %s" % representation.__class__)
  185. try:
  186. self.channel.basic_publish(amqp.Message(body=representation.get()),
  187. self.exchange,
  188. representation.data.id.encode("utf-8"))
  189. except Exception, e:
  190. self._close()
  191. raise StepError("failed to publish %s: %s" % (representation.__class__,e))
  192. def _connectIfNecessary(self):
  193. if self.channel is not None:
  194. return
  195. for i in range(0,len(self.services)):
  196. service = self._selectService()
  197. try:
  198. self._connect(service)
  199. return
  200. except Exception, e:
  201. self.warning("failed to connect to service %s: %s",service,e)
  202. raise StepError("could not connect to any of the specified messaging services")
  203. def _connect(self, service):
  204. if self.connection is not None:
  205. self._close()
  206. toks = service.split(":")
  207. host = toks[0]
  208. try:
  209. port = int(toks[1])
  210. except:
  211. if self.ssl_options is None:
  212. port = 5672
  213. else:
  214. port = 5671
  215. if self.ssl_options is None:
  216. ssl = False
  217. login_method = "AMQPLAIN"
  218. else:
  219. ssl = self.ssl_options
  220. if "certfile" in ssl:
  221. login_method = "EXTERNAL"
  222. else:
  223. login_method = "AMQPLAIN"
  224. self.connection = amqp.Connection(host="%s:%d" % (host,port),
  225. login_method=login_method,
  226. userid=self.username,
  227. password=self.password,
  228. virtual_host=self.vhost,
  229. ssl=ssl,
  230. heartbeat=60)
  231. self.channel = self.connection.channel()
  232. def _selectService(self):
  233. if self.cur_service is None:
  234. self.cur_service = random.randint(0,len(self.services)-1) # pick a random one the first time
  235. else:
  236. self.cur_service = (self.cur_service+1) % len(self.services) # round robin after that
  237. return self.services[self.cur_service]
  238. def _close(self):
  239. if self.connection is None:
  240. return
  241. # call close in a thread in case it takes a long time (e.g. network outage)
  242. thread = _AmqpConnectionClose(self.connection)
  243. self.channel = None
  244. self.connection = None
  245. thread.start()
  246. thread.join(5)
  247. if thread.isAlive():
  248. self.warning("close didn't finish quickly")
  249. class _AmqpConnectionClose(threading.Thread):
  250. def __init__(self, connection):
  251. threading.Thread.__init__(self)
  252. self.daemon = True
  253. self.connection = connection
  254. def run(self):
  255. try:
  256. self.connection.close()
  257. except:
  258. pass
  259. ##############################################################################################################
  260. class HttpStep(PublishStep):
  261. def __init__(self):
  262. PublishStep.__init__(self)
  263. self.description = "publishes documents by PUTing or POSTing them"
  264. self.time_out = 10
  265. self._acceptParameter("host","The host name of the server to publish to",True)
  266. self._acceptParameter("port","The port to publish to",False)
  267. self._acceptParameter("path","The path part of the URL",True)
  268. self._acceptParameter("method","PUT or POST (default PUT)",False)
  269. def run(self):
  270. try:
  271. self.host = self.params["host"]
  272. except KeyError:
  273. raise StepError("host not specified")
  274. try:
  275. self.port = self.params["port"]
  276. except KeyError:
  277. self.port = 80
  278. try:
  279. self.method = self.params["method"]
  280. except KeyError:
  281. self.method = "PUT"
  282. try:
  283. self.path = self.params["path"]
  284. except ConfigParser.Error:
  285. raise StepError("path not specified")
  286. PublishStep.run(self)
  287. def _publish(self, representation):
  288. self.info("publishing %s",representation)
  289. connection = httplib.HTTPConnection(self.host+":"+str(self.port))
  290. connection.request(self.method,self.path,representation.get(),{"Content-Type": representation.mime_type})
  291. response = httplib.getresponse()
  292. if not (response.status == httplib.OK or response.status == httplib.CREATED):
  293. self.error("failed to '"+self.method+"' to http://"+self.host+":"+self.port+self.path+" - "+
  294. str(response.status)+" "+response.reason)
  295. connection.close()
  296. #######################################################################################################################
  297. # mostly for debugging
  298. class PrintStep(PublishStep):
  299. def __init__(self):
  300. PublishStep.__init__(self)
  301. self.description = "publishes documents by writing them to stdout"
  302. self.time_out = 5
  303. def _publish(self, representation):
  304. print(representation.get())
  305. #######################################################################################################################