PageRenderTime 57ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/modules/miscutil/lib/dbdump.py

https://github.com/chokribr/invenio-1
Python | 374 lines | 259 code | 23 blank | 92 comment | 25 complexity | 553cf8565bec1f38ddc2e5a30b06db47 MD5 | raw file
Possible License(s): GPL-2.0
  1. # -*- coding: utf-8 -*-
  2. ##
  3. ## This file is part of Invenio.
  4. ## Copyright (C) 2009, 2010, 2011, 2012 CERN.
  5. ##
  6. ## Invenio is free software; you can redistribute it and/or
  7. ## modify it under the terms of the GNU General Public License as
  8. ## published by the Free Software Foundation; either version 2 of the
  9. ## License, or (at your option) any later version.
  10. ##
  11. ## Invenio is distributed in the hope that it will be useful, but
  12. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ## General Public License for more details.
  15. ##
  16. ## You should have received a copy of the GNU General Public License
  17. ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
  18. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  19. """
  20. Invenio DB dumper.
  21. """
  22. import os
  23. import re
  24. import time
  25. from invenio.config import CFG_LOGDIR, CFG_PATH_MYSQL, CFG_PATH_GZIP
  26. from invenio.dbquery import CFG_DATABASE_HOST, \
  27. CFG_DATABASE_USER, \
  28. CFG_DATABASE_PASS, \
  29. CFG_DATABASE_NAME, \
  30. CFG_DATABASE_PORT, \
  31. CFG_DATABASE_SLAVE, \
  32. get_connection_for_dump_on_slave, \
  33. run_sql
  34. from invenio.bibtask import task_init, \
  35. write_message, \
  36. task_set_option, \
  37. task_get_option, \
  38. task_update_progress, \
  39. task_get_task_param, \
  40. task_low_level_submission
  41. from invenio.shellutils import run_shell_command, \
  42. escape_shell_arg
  43. def get_table_names(value):
  44. """
  45. Get table names of the tables matching the given regular expressions
  46. @param option: list of regular expressions
  47. @return: list of strings
  48. """
  49. rex = re.compile(value)
  50. return [row[0] for row in run_sql("SHOW TABLES") if rex.search(row[0])]
  51. def _delete_old_dumps(dirname, filename, number_to_keep):
  52. """
  53. Look for files in DIRNAME directory starting with FILENAME
  54. pattern. Delete up to NUMBER_TO_KEEP files (when sorted
  55. alphabetically, which is equal to sorted by date). Useful to
  56. prune old dump files.
  57. """
  58. files = [x for x in os.listdir(dirname) if x.startswith(filename)]
  59. files.sort()
  60. for afile in files[:-number_to_keep]:
  61. write_message("... deleting %s" % dirname + os.sep + afile)
  62. os.remove(dirname + os.sep + afile)
  63. def check_slave_is_up(connection=None):
  64. """Raise an StandardError in case the slave is not correctly up."""
  65. if connection is None:
  66. connection = get_connection_for_dump_on_slave()
  67. res = run_sql("SHOW SLAVE STATUS", with_dict=True, connection=connection)
  68. if res[0]['Slave_IO_Running'] != 'Yes':
  69. raise StandardError("Slave_IO_Running is not set to 'Yes'")
  70. if res[0]['Slave_SQL_Running'] != 'Yes':
  71. raise StandardError("Slave_SQL_Running is not set to 'Yes'")
  72. def check_slave_is_down(connection=None):
  73. """Raise an StandardError in case the slave is not correctly down."""
  74. if connection is None:
  75. connection = get_connection_for_dump_on_slave()
  76. res = run_sql("SHOW SLAVE STATUS", with_dict=True, connection=connection)
  77. if res[0]['Slave_SQL_Running'] != 'No':
  78. raise StandardError("Slave_SQL_Running is not set to 'No'")
  79. def detach_slave(connection=None):
  80. """Detach the slave."""
  81. if connection is None:
  82. connection = get_connection_for_dump_on_slave()
  83. run_sql("STOP SLAVE SQL_THREAD", connection=connection)
  84. check_slave_is_down(connection)
  85. def attach_slave(connection=None):
  86. """Attach the slave."""
  87. if connection is None:
  88. connection = get_connection_for_dump_on_slave()
  89. run_sql("START SLAVE", connection=connection)
  90. check_slave_is_up(connection)
  91. def check_slave_is_in_consistent_state(connection=None):
  92. """
  93. Check if the slave is already aware that dbdump task is running.
  94. dbdump being a monotask, guarantee that no other task is currently
  95. running and it's hence safe to detach the slave and start the
  96. actual dump.
  97. """
  98. if connection is None:
  99. connection = get_connection_for_dump_on_slave()
  100. i = 0
  101. ## Let's take the current status of dbdump (e.g. RUNNING, ABOUT TO STOP, etc.)...
  102. current_status = run_sql("SELECT status FROM schTASK WHERE id=%s", (task_get_task_param('task_id'), ))[0][0]
  103. while True:
  104. if i == 10:
  105. ## Timeout!!
  106. raise StandardError("The slave seems not to pick up with the master")
  107. ## ...and let's see if it matches with what the slave sees.
  108. if run_sql("SELECT status FROM schTASK WHERE id=%s AND status=%s", (task_get_task_param('task_id'), current_status), connection=connection):
  109. ## Bingo!
  110. return
  111. time.sleep(3)
  112. i += 1
  113. def dump_database(dump_path, host=CFG_DATABASE_HOST, port=CFG_DATABASE_PORT, \
  114. user=CFG_DATABASE_USER, passw=CFG_DATABASE_PASS, \
  115. name=CFG_DATABASE_NAME, params=None, compress=False, \
  116. ignore_tables=None):
  117. """
  118. Dump Invenio database into SQL file located at DUMP_PATH.
  119. Will perform the command to mysqldump with the given host configuration
  120. and user credentials.
  121. Optional mysqldump parameters can also be passed. Otherwise, a default
  122. set of parameters will be used.
  123. @param dump_path: path on the filesystem to save the dump to.
  124. @type dump_path: string
  125. @param host: hostname of mysql database node to connect to.
  126. @type host: string
  127. @param port: port of mysql database node to connect to
  128. @type port: string
  129. @param user: username to connect with
  130. @type user: string
  131. @param passw: password to connect to with
  132. @type passw: string
  133. @param name: name of mysql database node to dump
  134. @type name: string
  135. @param params: command line parameters to pass to mysqldump. Optional.
  136. @type params: string
  137. @param compress: should the dump be compressed through gzip?
  138. @type compress: bool
  139. @param ignore_tables: list of tables to ignore in the dump
  140. @type ignore: list of string
  141. """
  142. write_message("... writing %s" % (dump_path,))
  143. partial_dump_path = dump_path + ".part"
  144. # Is mysqldump installed or in the right path?
  145. cmd_prefix = CFG_PATH_MYSQL + 'dump'
  146. if not os.path.exists(cmd_prefix):
  147. raise StandardError("%s is not installed." % (cmd_prefix))
  148. if not params:
  149. # No parameters set, lets use the default ones.
  150. params = " --skip-opt --add-drop-table --add-locks --create-options" \
  151. " --quick --extended-insert --set-charset --disable-keys" \
  152. " --lock-tables=false --max_allowed_packet=2G "
  153. if ignore_tables:
  154. params += " ".join([escape_shell_arg("--ignore-table=%s.%s" % (CFG_DATABASE_NAME, table)) for table in ignore_tables])
  155. dump_cmd = "%s %s " \
  156. " --host=%s --port=%s --user=%s --password=%s %s" % \
  157. (cmd_prefix, \
  158. params, \
  159. escape_shell_arg(host), \
  160. escape_shell_arg(str(port)), \
  161. escape_shell_arg(user), \
  162. escape_shell_arg(passw), \
  163. escape_shell_arg(name))
  164. if compress:
  165. dump_cmd = "%s | %s -cf; exit ${PIPESTATUS[0]}" % \
  166. (dump_cmd, \
  167. CFG_PATH_GZIP)
  168. dump_cmd = "bash -c %s" % (escape_shell_arg(dump_cmd),)
  169. write_message(dump_cmd, verbose=2)
  170. exit_code, stdout, stderr = run_shell_command(dump_cmd, None, partial_dump_path)
  171. if exit_code:
  172. raise StandardError("ERROR: mysqldump exit code is %s. stderr: %s stdout: %s" % \
  173. (repr(exit_code), \
  174. repr(stderr), \
  175. repr(stdout)))
  176. else:
  177. os.rename(partial_dump_path, dump_path)
  178. write_message("... completed writing %s" % (dump_path,))
  179. def _dbdump_elaborate_submit_param(key, value, dummyopts, dummyargs):
  180. """
  181. Elaborate task submission parameter. See bibtask's
  182. task_submit_elaborate_specific_parameter_fnc for help.
  183. """
  184. if key in ('-n', '--number'):
  185. try:
  186. task_set_option('number', int(value))
  187. except ValueError:
  188. raise StandardError("ERROR: Number '%s' is not integer." % (value,))
  189. elif key in ('-o', '--output'):
  190. if os.path.isdir(value):
  191. task_set_option('output', value)
  192. else:
  193. raise StandardError("ERROR: Output '%s' is not a directory." % \
  194. (value,))
  195. elif key in ('--params',):
  196. task_set_option('params', value)
  197. elif key in ('--compress',):
  198. if not CFG_PATH_GZIP or (CFG_PATH_GZIP and not os.path.exists(CFG_PATH_GZIP)):
  199. raise StandardError("ERROR: No valid gzip path is defined.")
  200. task_set_option('compress', True)
  201. elif key in ('-S', '--slave'):
  202. if value:
  203. task_set_option('slave', value)
  204. else:
  205. if not CFG_DATABASE_SLAVE:
  206. raise StandardError("ERROR: No slave defined.")
  207. task_set_option('slave', CFG_DATABASE_SLAVE)
  208. elif key in ('--dump-on-slave-helper', ):
  209. task_set_option('dump_on_slave_helper_mode', True)
  210. elif key in ('--ignore-tables',):
  211. try:
  212. re.compile(value)
  213. task_set_option("ignore_tables", value)
  214. except re.error:
  215. raise StandardError, "ERROR: Passed string: '%s' is not a valid regular expression." % value
  216. else:
  217. return False
  218. return True
  219. def _dbdump_run_task_core():
  220. """
  221. Run DB dumper core stuff.
  222. Note: do not use task_can_sleep() stuff here because we don't want
  223. other tasks to interrupt us while we are dumping the DB content.
  224. """
  225. # read params:
  226. host = CFG_DATABASE_HOST
  227. port = CFG_DATABASE_PORT
  228. connection = None
  229. try:
  230. if task_get_option('slave') and not task_get_option('dump_on_slave_helper_mode'):
  231. connection = get_connection_for_dump_on_slave()
  232. write_message("Dump on slave requested")
  233. write_message("... checking if slave is well up...")
  234. check_slave_is_up(connection)
  235. write_message("... checking if slave is in consistent state...")
  236. check_slave_is_in_consistent_state(connection)
  237. write_message("... detaching slave database...")
  238. detach_slave(connection)
  239. write_message("... scheduling dump on slave helper...")
  240. helper_arguments = []
  241. if task_get_option("number"):
  242. helper_arguments += ["--number", str(task_get_option("number"))]
  243. if task_get_option("output"):
  244. helper_arguments += ["--output", str(task_get_option("output"))]
  245. if task_get_option("params"):
  246. helper_arguments += ["--params", str(task_get_option("params"))]
  247. if task_get_option("ignore_tables"):
  248. helper_arguments += ["--ignore-tables", str(task_get_option("ignore_tables"))]
  249. if task_get_option("compress"):
  250. helper_arguments += ["--compress"]
  251. if task_get_option("slave"):
  252. helper_arguments += ["--slave", str(task_get_option("slave"))]
  253. helper_arguments += ['-N', 'slavehelper', '--dump-on-slave-helper']
  254. task_id = task_low_level_submission('dbdump', task_get_task_param('user'), '-P4', *helper_arguments)
  255. write_message("Slave scheduled with ID %s" % task_id)
  256. task_update_progress("DONE")
  257. return True
  258. elif task_get_option('dump_on_slave_helper_mode'):
  259. write_message("Dumping on slave mode")
  260. connection = get_connection_for_dump_on_slave()
  261. write_message("... checking if slave is well down...")
  262. check_slave_is_down(connection)
  263. host = CFG_DATABASE_SLAVE
  264. task_update_progress("Reading parameters")
  265. write_message("Reading parameters started")
  266. output_dir = task_get_option('output', CFG_LOGDIR)
  267. output_num = task_get_option('number', 5)
  268. params = task_get_option('params', None)
  269. compress = task_get_option('compress', False)
  270. slave = task_get_option('slave', False)
  271. ignore_tables = task_get_option('ignore_tables', None)
  272. if ignore_tables:
  273. ignore_tables = get_table_names(ignore_tables)
  274. else:
  275. ignore_tables = None
  276. output_file_suffix = task_get_task_param('task_starting_time')
  277. output_file_suffix = output_file_suffix.replace(' ', '_') + '.sql'
  278. if compress:
  279. output_file_suffix = "%s.gz" % (output_file_suffix,)
  280. write_message("Reading parameters ended")
  281. # make dump:
  282. task_update_progress("Dumping database")
  283. write_message("Database dump started")
  284. if slave:
  285. output_file_prefix = 'slave-%s-dbdump-' % (CFG_DATABASE_NAME,)
  286. else:
  287. output_file_prefix = '%s-dbdump-' % (CFG_DATABASE_NAME,)
  288. output_file = output_file_prefix + output_file_suffix
  289. dump_path = output_dir + os.sep + output_file
  290. dump_database(dump_path, \
  291. host=host,
  292. port=port,
  293. params=params, \
  294. compress=compress, \
  295. ignore_tables=ignore_tables)
  296. write_message("Database dump ended")
  297. finally:
  298. if connection and task_get_option('dump_on_slave_helper_mode'):
  299. write_message("Reattaching slave")
  300. attach_slave(connection)
  301. # prune old dump files:
  302. task_update_progress("Pruning old dump files")
  303. write_message("Pruning old dump files started")
  304. _delete_old_dumps(output_dir, output_file_prefix, output_num)
  305. write_message("Pruning old dump files ended")
  306. # we are done:
  307. task_update_progress("Done.")
  308. return True
  309. def main():
  310. """Main that construct all the bibtask."""
  311. task_init(authorization_action='rundbdump',
  312. authorization_msg="DB Dump Task Submission",
  313. help_specific_usage="""\
  314. -o, --output=DIR Output directory. [default=%s]
  315. -n, --number=NUM Keep up to NUM previous dump files. [default=5]
  316. --params=PARAMS Specify your own mysqldump parameters. Optional.
  317. --compress Compress dump directly into gzip.
  318. -S, --slave=HOST Perform the dump from a slave, if no host use CFG_DATABASE_SLAVE.
  319. --ignore-tables=regex Ignore tables matching the given regular expression
  320. Examples:
  321. $ dbdump --ignore-tables '^(idx|rnk)'
  322. $ dbdump -n3 -o/tmp -s1d -L 02:00-04:00
  323. """ % CFG_LOGDIR,
  324. specific_params=("n:o:p:S:",
  325. ["number=", "output=", "params=", "slave=", "compress", 'ignore-tables=', "dump-on-slave-helper"]),
  326. task_submit_elaborate_specific_parameter_fnc=_dbdump_elaborate_submit_param,
  327. task_run_fnc=_dbdump_run_task_core)
  328. if __name__ == '__main__':
  329. main()