/lib/galaxy/model/migrate/versions/0076_fix_form_values_data_corruption.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 103 lines · 74 code · 7 blank · 22 comment · 16 complexity · 4d58e9c0c6d52fd0b82982a89f10a408 MD5 · raw file

  1. '''
  2. This migration script fixes the data corruption caused in the form_values
  3. table (content json field) by migrate script 65.
  4. '''
  5. from sqlalchemy import *
  6. from sqlalchemy.orm import *
  7. from migrate import *
  8. from migrate.changeset import *
  9. from sqlalchemy.exc import *
  10. import binascii
  11. from galaxy.util.json import from_json_string, to_json_string
  12. import logging
  13. log = logging.getLogger( __name__ )
  14. metadata = MetaData()
  15. def _sniffnfix_pg9_hex(value):
  16. """
  17. Sniff for and fix postgres 9 hex decoding issue
  18. """
  19. try:
  20. if value[0] == 'x':
  21. return binascii.unhexlify(value[1:])
  22. else:
  23. return value
  24. except Exception, ex:
  25. return value
  26. def upgrade(migrate_engine):
  27. metadata.bind = migrate_engine
  28. print __doc__
  29. metadata.reflect()
  30. cmd = "SELECT form_values.id as id, form_values.content as field_values, form_definition.fields as fdfields " \
  31. + " FROM form_definition, form_values " \
  32. + " WHERE form_values.form_definition_id=form_definition.id " \
  33. + " ORDER BY form_values.id"
  34. result = migrate_engine.execute( cmd )
  35. corrupted_rows = 0
  36. for row in result:
  37. # first check if loading the dict from the json succeeds
  38. # if that fails, it means that the content field is corrupted.
  39. try:
  40. field_values_dict = from_json_string( _sniffnfix_pg9_hex( str( row['field_values'] ) ) )
  41. except Exception, e:
  42. corrupted_rows = corrupted_rows + 1
  43. # content field is corrupted
  44. fields_list = from_json_string( _sniffnfix_pg9_hex( str( row['fdfields'] ) ) )
  45. field_values_str = _sniffnfix_pg9_hex( str( row['field_values'] ) )
  46. try:
  47. #Encoding errors? Just to be safe.
  48. print "Attempting to fix row %s" % row['id']
  49. print "Prior to replacement: %s" % field_values_str
  50. except:
  51. pass
  52. field_values_dict = {}
  53. # look for each field name in the values and extract its value (string)
  54. for index in range( len(fields_list) ):
  55. field = fields_list[index]
  56. field_name_key = '"%s": "' % field['name']
  57. field_index = field_values_str.find( field_name_key )
  58. if field_index == -1:
  59. # if the field name is not present the field values dict then
  60. # inform the admin that this form values cannot be fixed
  61. print "The 'content' field of row 'id' %i does not have the field '%s' in the 'form_values' table and could not be fixed by this migration script." % ( int( field['id'] ), field['name'] )
  62. else:
  63. # check if this is the last field
  64. if index == len( fields_list )-1:
  65. # since this is the last field, the value string lies between the
  66. # field name and the '"}' string at the end, hence len(field_values_str)-2
  67. value = field_values_str[ field_index+len( field_name_key ):len( field_values_str )-2 ]
  68. else:
  69. # if this is not the last field then the value string lies between
  70. # this field name and the next field name
  71. next_field = fields_list[index+1]
  72. next_field_index = field_values_str.find( '", "%s": "' % next_field['name'] )
  73. value = field_values_str[ field_index+len( field_name_key ):next_field_index ]
  74. # clean up the value string, escape the required quoutes and newline characters
  75. value = value.replace( "'", "\''" )\
  76. .replace( '"', '\\\\"' )\
  77. .replace( '\r', "\\\\r" )\
  78. .replace( '\n', "\\\\n" )\
  79. .replace( '\t', "\\\\t" )
  80. # add to the new values dict
  81. field_values_dict[ field['name'] ] = value
  82. # update the db
  83. json_values = to_json_string(field_values_dict)
  84. cmd = "UPDATE form_values SET content='%s' WHERE id=%i" %( json_values, int( row['id'] ) )
  85. migrate_engine.execute( cmd )
  86. try:
  87. print "Post replacement: %s" % json_values
  88. except:
  89. pass
  90. if corrupted_rows:
  91. print 'Fixed %i corrupted rows.' % corrupted_rows
  92. else:
  93. print 'No corrupted rows found.'
  94. def downgrade(migrate_engine):
  95. metadata.bind = migrate_engine
  96. pass