PageRenderTime 24ms CodeModel.GetById 10ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/model/migrate/versions/0076_fix_form_values_data_corruption.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 103 lines | 74 code | 7 blank | 22 comment | 8 complexity | 4d58e9c0c6d52fd0b82982a89f10a408 MD5 | raw file
  1'''
  2This migration script fixes the data corruption caused in the form_values
  3table (content json field) by migrate script 65.
  4'''
  5from sqlalchemy import *
  6from sqlalchemy.orm import *
  7from migrate import *
  8from migrate.changeset import *
  9from sqlalchemy.exc import *
 10import binascii
 11
 12from galaxy.util.json import from_json_string, to_json_string
 13
 14import logging
 15log = logging.getLogger( __name__ )
 16
 17metadata = MetaData()
 18
 19def _sniffnfix_pg9_hex(value):
 20    """
 21    Sniff for and fix postgres 9 hex decoding issue
 22    """
 23    try:
 24        if value[0] == 'x':
 25            return binascii.unhexlify(value[1:])
 26        else:
 27            return value
 28    except Exception, ex:
 29        return value
 30
 31def upgrade(migrate_engine):
 32    metadata.bind = migrate_engine
 33    print __doc__
 34    metadata.reflect()
 35    cmd = "SELECT form_values.id as id, form_values.content as field_values, form_definition.fields as fdfields " \
 36          + " FROM form_definition, form_values " \
 37          + " WHERE form_values.form_definition_id=form_definition.id " \
 38          + " ORDER BY form_values.id"
 39    result = migrate_engine.execute( cmd )
 40    corrupted_rows = 0
 41    for row in result:
 42        # first check if loading the dict from the json succeeds
 43        # if that fails, it means that the content field is corrupted.
 44        try:
 45            field_values_dict = from_json_string( _sniffnfix_pg9_hex( str( row['field_values'] ) ) )
 46        except Exception, e:
 47            corrupted_rows = corrupted_rows + 1
 48            # content field is corrupted
 49            fields_list = from_json_string( _sniffnfix_pg9_hex( str( row['fdfields'] ) ) )
 50            field_values_str = _sniffnfix_pg9_hex( str( row['field_values'] ) )
 51            try:
 52                #Encoding errors?  Just to be safe.
 53                print "Attempting to fix row %s" % row['id']
 54                print "Prior to replacement: %s" % field_values_str
 55            except:
 56                pass
 57            field_values_dict = {}
 58            # look for each field name in the values and extract its value (string)
 59            for index in range( len(fields_list) ):
 60                field = fields_list[index]
 61                field_name_key = '"%s": "' % field['name']
 62                field_index = field_values_str.find( field_name_key )
 63                if field_index == -1:
 64                    # if the field name is not present the field values dict then
 65                    # inform the admin that this form values cannot be fixed
 66                    print "The 'content' field of row 'id' %i does not have the field '%s' in the 'form_values' table and could not be fixed by this migration script." % ( int( field['id'] ), field['name'] )
 67                else:
 68                    # check if this is the last field
 69                    if index == len( fields_list )-1:
 70                        # since this is the last field, the value string lies between the
 71                        # field name and the '"}' string at the end, hence len(field_values_str)-2
 72                        value = field_values_str[ field_index+len( field_name_key ):len( field_values_str )-2 ]
 73                    else:
 74                        # if this is not the last field then the value string lies between
 75                        # this field name and the next field name
 76                        next_field = fields_list[index+1]
 77                        next_field_index = field_values_str.find( '", "%s": "' % next_field['name'] )
 78                        value = field_values_str[ field_index+len( field_name_key ):next_field_index ]
 79                    # clean up the value string, escape the required quoutes and newline characters
 80                    value = value.replace( "'", "\''" )\
 81                                 .replace( '"', '\\\\"' )\
 82                                 .replace( '\r', "\\\\r" )\
 83                                 .replace( '\n', "\\\\n" )\
 84                                 .replace( '\t', "\\\\t" )
 85                    # add to the new values dict
 86                    field_values_dict[ field['name'] ] = value
 87            # update the db
 88            json_values = to_json_string(field_values_dict)
 89            cmd = "UPDATE form_values SET content='%s' WHERE id=%i" %( json_values, int( row['id'] ) )
 90            migrate_engine.execute( cmd )
 91            try:
 92                print "Post replacement: %s" % json_values
 93            except:
 94                pass
 95    if corrupted_rows:
 96        print 'Fixed %i corrupted rows.' % corrupted_rows
 97    else:
 98        print 'No corrupted rows found.'
 99
100def downgrade(migrate_engine):
101    metadata.bind = migrate_engine
102    pass
103