FeExtractStats.py | searchcode

/FeExtractStats.py

https://gitlab.com/michaelchin/FeStat
Python | 173 lines | 169 code | 2 blank | 2 comment | 1 complexity | 6731d428db9e103bc6ae5c6ba7b3dab1 MD5 | raw file

import re
import FeStatUtils as Utils

# C const-ish declaration in python...
NUM_STATS = 8

# Set-up regex patterns {
patternBase = "<td>%s</td>"
namePattern = patternBase % "([A-Z][a-z]*).*"
classPattern = patternBase % "(.*)"
statPattern = "\n" + patternBase % r"(\d*)"
charMsPattern = "\n" + patternBase % r"(?:()|(\d)|.*(-\d).*)"
charMsPat = re.compile( namePattern + "\n" +
                        "\n".join( [ classPattern for i in range( NUM_STATS - 1 ) ] ) )
charBsPat = re.compile( namePattern + "\n" + classPattern +
                        "".join( [ statPattern for i in range( NUM_STATS + 1 ) ] ) )
grPat = re.compile( namePattern + "".join( [ statPattern for i in range( NUM_STATS ) ] ) )
# FeClasses require slightly different patterns due to extra formatting in some cases
feClassNamePattern = "<td.*>((?:[A-Z][a-z]*.*)+)</td>"
feClassStatPattern = "\n" + r"<td.*>(\d*)</td>"
classPat = re.compile( feClassNamePattern +
                       "".join( [ feClassStatPattern for i in range( NUM_STATS ) ] ) )
# end regex patterns }

# Open files for reading {
filepath = "/Users/michaelchin/Dropbox/FeStat/data/%s"
charBs = open( filepath % "char-bs.html", "r" )
charGrH = open( filepath % "hoshido-char-gr.html", "r" )
charMsH = open( filepath % "hoshido-char-ms.html", "r" )
charGrN = open( filepath % "nohr-char-gr.html", "r" )
charMsN = open( filepath % "nohr-char-ms.html", "r" )
classBsH = open( filepath % "hoshido-class-bs.html", "r" )
classMsH = open( filepath % "hoshido-class-ms.html", "r" )
classGrH = open( filepath % "hoshido-class-gr.html", "r" )
classBsN = open( filepath % "nohr-class-bs.html", "r" )
classMsN = open( filepath % "nohr-class-ms.html", "r" )
classGrN = open( filepath % "nohr-class-gr.html", "r" )

# Parse files
charBsMatch = charBsPat.findall( charBs.read() )
charMsMatch = charMsPat.findall( charMsH.read() + charMsN.read() )
charGrMatch = grPat.findall( charGrH.read() + charGrN.read() )
# Conveniently, all Class-data can be parsed using the same pattern
classBsMatch = classPat.findall( classBsH.read() + classBsN.read() )
classGrMatch = classPat.findall( classGrH.read() + classGrN.read() )
classMsMatch = classPat.findall( classMsH.read() + classMsN.read() )

# Close all files
charBs.close()
charGrH.close()
charMsH.close()
charGrN.close()
charMsN.close()
classBsH.close()
classMsH.close()
classGrH.close()
classBsN.close()
classMsN.close()
classGrN.close()
# end data-file io }

# Ensure output files are empty
open( filepath % "../CharDb.py", "w" ).close()
open( filepath % "../ClassDb.py", "w").close()

# START CLASS_DB GEN {
# Separate class base-stats, growth rates, and max-stats into buckets. From there, we can
# process all three stats for each class all at once.
classBucket = {}
# Fill buckets
for match in classBsMatch + classGrMatch + classMsMatch:
   name = Utils.processClassName( match[ 0 ] )
   if name not in classBucket:
      classBucket[ name ] = []
   classBucket[ name ].append( match[ 1: ] )
classBucket[ "Butler" ] = classBucket[ "Maid" ]
classBucket[ "ShrineMaiden" ] = classBucket[ "Monk" ]
# Convert bucket to entries
classDict = {}
for ( name, bucket ) in classBucket.iteritems():
   # Python preserves list order (supposedly). If that's the case:
   #    0 = base stats
   #    1 = growth rates
   #    2 = max stats
   classDict[ name ] = Utils.classBaseStatInitializer % ( name, str( bucket[ 0 ] ) )
   classDict[ name ] += Utils.growthRateInitializer % ( str( bucket[ 1 ] ) )
   classDict[ name ] += Utils.maxStatInitializer % ( str( bucket[ 2 ] ) )
   if name in Utils.promotedClass:
      classDict[ name ] += Utils.promotedInitializer % 'True'
   else:
      classDict[ name ] += Utils.promotedInitializer % 'False'
   classDict[ name ] += "\n"
# END CLASS_DB_GEN }

# Write ClassDb.py
classDb = open( filepath % "../ClassDb.py", "w" )
classDb.write( "import FeStatUtils as Utils\n\n" )
classDb.write( Utils.classParentClass )
for entry in classDict.itervalues():
   classDb.write( entry )
classDb.close()

# START CHAR_DB GEN {
# Generate character-classes by name
charDict = {}
# Start by processing base-stats, then mark character as 'processed'
for bs in charBsMatch:
   # Position 0=name, 1=class, 2=baseLevel
   name = bs[ 0 ]
   # Hack: for some reason, Saizo is listed on the base-stats page as 'Saizou'. Correct that.
   if name == "Saizou":
      name = "Saizo"
   if name in charDict:
      # We've seen this character before; move on
      continue
   baseClass = Utils.classTranslate( bs[ 1 ] )
   baseLevel = bs[ 2 ]
   charDict[ name ] = Utils.baseStatInitializer % ( name, baseLevel, baseClass, str( bs[ 3: ] ) )
# Next, process growth rates
processedChars = []
for gr in charGrMatch:
   name = gr[ 0 ]
   # Some characters are duplicate entries between the Hoshido and Nohrian pages. Don't process
   # them twice. Also, there are a list of characters not recruitable in the third path, and
   # since I'm currently only concerned with the third path, I'm going to ignore them. Let's be
   # real though: we already ignore them anyway.
   if ( name in processedChars ) or ( name in [ 'Izana', 'Yukimura' ] ) :
      continue
   else:
      processedChars.append( name )
   charDict[ name ] += Utils.growthRateInitializer % ( str( gr[ 1: ] ) )
# Last, process max-stats. Regex parsing grabs html-tags for negative numbers, so we need to
# process that out. numRe use is explained below...
negRe = re.compile( r"(-\d)" )
numRe = re.compile( r"(\d)" )
# Reset processedChars list. We'll reuse it for the same purpose
processedChars = []
for ms in charMsMatch:
   name = ms[ 0 ]
   # We may have duplicate entries, or potentially no entries due to regex parser (compared
   # to the other two stats, the regex for this sucks)
   if ( name in processedChars ) or ( name not in charDict ):
      continue
   else:
      processedChars.append( name )
   def processItem( item ):
      negMatch = negRe.search( item )
      if negMatch:
         return int( negMatch.group() )
      # Some entries, like Scarlet's, have dirty characters next to the numbers. Process
      # those out with regex...
      numMatch = numRe.search( item )
      if numMatch:
         return int( numMatch.group() )
      assert not item
      return 0
   processedTuple = tuple( [ 0 ] + [ processItem( i ) for i in ms[ 1: ] ] )
   charDict[ name ] += Utils.maxStatInitializer % ( str( processedTuple ) )
# END CHAR_DB GEN }

# Write CharDb.py, which has some dependency on ClassDb
charDb = open( filepath % "../CharDb.py", "w" )
charDb.write( "import FeStatUtils as Utils\nimport ClassDb\n\n" )
charDb.write( Utils.charParentClass )
for entry in charDict.itervalues():
   if "maxStats_" not in entry:
      # For the characters with no default maxStats, initialize the attr to None so that
      # maxStats() will return something
      entry += "\t\tself.maxStats_ = None\n"
   # Add a newline to every entry because formatting
   entry += "\n"
   charDb.write( entry )
charDb.close()