/FeExtractStats.py
Python | 173 lines | 169 code | 2 blank | 2 comment | 1 complexity | 6731d428db9e103bc6ae5c6ba7b3dab1 MD5 | raw file
- import re
- import FeStatUtils as Utils
- # C const-ish declaration in python...
- NUM_STATS = 8
- # Set-up regex patterns {
- patternBase = "<td>%s</td>"
- namePattern = patternBase % "([A-Z][a-z]*).*"
- classPattern = patternBase % "(.*)"
- statPattern = "\n" + patternBase % r"(\d*)"
- charMsPattern = "\n" + patternBase % r"(?:()|(\d)|.*(-\d).*)"
- charMsPat = re.compile( namePattern + "\n" +
- "\n".join( [ classPattern for i in range( NUM_STATS - 1 ) ] ) )
- charBsPat = re.compile( namePattern + "\n" + classPattern +
- "".join( [ statPattern for i in range( NUM_STATS + 1 ) ] ) )
- grPat = re.compile( namePattern + "".join( [ statPattern for i in range( NUM_STATS ) ] ) )
- # FeClasses require slightly different patterns due to extra formatting in some cases
- feClassNamePattern = "<td.*>((?:[A-Z][a-z]*.*)+)</td>"
- feClassStatPattern = "\n" + r"<td.*>(\d*)</td>"
- classPat = re.compile( feClassNamePattern +
- "".join( [ feClassStatPattern for i in range( NUM_STATS ) ] ) )
- # end regex patterns }
- # Open files for reading {
- filepath = "/Users/michaelchin/Dropbox/FeStat/data/%s"
- charBs = open( filepath % "char-bs.html", "r" )
- charGrH = open( filepath % "hoshido-char-gr.html", "r" )
- charMsH = open( filepath % "hoshido-char-ms.html", "r" )
- charGrN = open( filepath % "nohr-char-gr.html", "r" )
- charMsN = open( filepath % "nohr-char-ms.html", "r" )
- classBsH = open( filepath % "hoshido-class-bs.html", "r" )
- classMsH = open( filepath % "hoshido-class-ms.html", "r" )
- classGrH = open( filepath % "hoshido-class-gr.html", "r" )
- classBsN = open( filepath % "nohr-class-bs.html", "r" )
- classMsN = open( filepath % "nohr-class-ms.html", "r" )
- classGrN = open( filepath % "nohr-class-gr.html", "r" )
- # Parse files
- charBsMatch = charBsPat.findall( charBs.read() )
- charMsMatch = charMsPat.findall( charMsH.read() + charMsN.read() )
- charGrMatch = grPat.findall( charGrH.read() + charGrN.read() )
- # Conveniently, all Class-data can be parsed using the same pattern
- classBsMatch = classPat.findall( classBsH.read() + classBsN.read() )
- classGrMatch = classPat.findall( classGrH.read() + classGrN.read() )
- classMsMatch = classPat.findall( classMsH.read() + classMsN.read() )
- # Close all files
- charBs.close()
- charGrH.close()
- charMsH.close()
- charGrN.close()
- charMsN.close()
- classBsH.close()
- classMsH.close()
- classGrH.close()
- classBsN.close()
- classMsN.close()
- classGrN.close()
- # end data-file io }
- # Ensure output files are empty
- open( filepath % "../CharDb.py", "w" ).close()
- open( filepath % "../ClassDb.py", "w").close()
- # START CLASS_DB GEN {
- # Separate class base-stats, growth rates, and max-stats into buckets. From there, we can
- # process all three stats for each class all at once.
- classBucket = {}
- # Fill buckets
- for match in classBsMatch + classGrMatch + classMsMatch:
- name = Utils.processClassName( match[ 0 ] )
- if name not in classBucket:
- classBucket[ name ] = []
- classBucket[ name ].append( match[ 1: ] )
- classBucket[ "Butler" ] = classBucket[ "Maid" ]
- classBucket[ "ShrineMaiden" ] = classBucket[ "Monk" ]
- # Convert bucket to entries
- classDict = {}
- for ( name, bucket ) in classBucket.iteritems():
- # Python preserves list order (supposedly). If that's the case:
- # 0 = base stats
- # 1 = growth rates
- # 2 = max stats
- classDict[ name ] = Utils.classBaseStatInitializer % ( name, str( bucket[ 0 ] ) )
- classDict[ name ] += Utils.growthRateInitializer % ( str( bucket[ 1 ] ) )
- classDict[ name ] += Utils.maxStatInitializer % ( str( bucket[ 2 ] ) )
- if name in Utils.promotedClass:
- classDict[ name ] += Utils.promotedInitializer % 'True'
- else:
- classDict[ name ] += Utils.promotedInitializer % 'False'
- classDict[ name ] += "\n"
- # END CLASS_DB_GEN }
- # Write ClassDb.py
- classDb = open( filepath % "../ClassDb.py", "w" )
- classDb.write( "import FeStatUtils as Utils\n\n" )
- classDb.write( Utils.classParentClass )
- for entry in classDict.itervalues():
- classDb.write( entry )
- classDb.close()
- # START CHAR_DB GEN {
- # Generate character-classes by name
- charDict = {}
- # Start by processing base-stats, then mark character as 'processed'
- for bs in charBsMatch:
- # Position 0=name, 1=class, 2=baseLevel
- name = bs[ 0 ]
- # Hack: for some reason, Saizo is listed on the base-stats page as 'Saizou'. Correct that.
- if name == "Saizou":
- name = "Saizo"
- if name in charDict:
- # We've seen this character before; move on
- continue
- baseClass = Utils.classTranslate( bs[ 1 ] )
- baseLevel = bs[ 2 ]
- charDict[ name ] = Utils.baseStatInitializer % ( name, baseLevel, baseClass, str( bs[ 3: ] ) )
- # Next, process growth rates
- processedChars = []
- for gr in charGrMatch:
- name = gr[ 0 ]
- # Some characters are duplicate entries between the Hoshido and Nohrian pages. Don't process
- # them twice. Also, there are a list of characters not recruitable in the third path, and
- # since I'm currently only concerned with the third path, I'm going to ignore them. Let's be
- # real though: we already ignore them anyway.
- if ( name in processedChars ) or ( name in [ 'Izana', 'Yukimura' ] ) :
- continue
- else:
- processedChars.append( name )
- charDict[ name ] += Utils.growthRateInitializer % ( str( gr[ 1: ] ) )
- # Last, process max-stats. Regex parsing grabs html-tags for negative numbers, so we need to
- # process that out. numRe use is explained below...
- negRe = re.compile( r"(-\d)" )
- numRe = re.compile( r"(\d)" )
- # Reset processedChars list. We'll reuse it for the same purpose
- processedChars = []
- for ms in charMsMatch:
- name = ms[ 0 ]
- # We may have duplicate entries, or potentially no entries due to regex parser (compared
- # to the other two stats, the regex for this sucks)
- if ( name in processedChars ) or ( name not in charDict ):
- continue
- else:
- processedChars.append( name )
- def processItem( item ):
- negMatch = negRe.search( item )
- if negMatch:
- return int( negMatch.group() )
- # Some entries, like Scarlet's, have dirty characters next to the numbers. Process
- # those out with regex...
- numMatch = numRe.search( item )
- if numMatch:
- return int( numMatch.group() )
- assert not item
- return 0
- processedTuple = tuple( [ 0 ] + [ processItem( i ) for i in ms[ 1: ] ] )
- charDict[ name ] += Utils.maxStatInitializer % ( str( processedTuple ) )
- # END CHAR_DB GEN }
- # Write CharDb.py, which has some dependency on ClassDb
- charDb = open( filepath % "../CharDb.py", "w" )
- charDb.write( "import FeStatUtils as Utils\nimport ClassDb\n\n" )
- charDb.write( Utils.charParentClass )
- for entry in charDict.itervalues():
- if "maxStats_" not in entry:
- # For the characters with no default maxStats, initialize the attr to None so that
- # maxStats() will return something
- entry += "\t\tself.maxStats_ = None\n"
- # Add a newline to every entry because formatting
- entry += "\n"
- charDb.write( entry )
- charDb.close()