FeExtractStats.py - C const-ish declaration in python... Se…

/FeExtractStats.py

https://gitlab.com/michaelchin/FeStat · Python · 173 lines · 119 code · 10 blank · 44 comment · 27 complexity · 6731d428db9e103bc6ae5c6ba7b3dab1 MD5 · raw file

import re
import FeStatUtils as Utils

# C const-ish declaration in python...
NUM_STATS = 8

# Set-up regex patterns {
patternBase = "<td>%s</td>"
namePattern = patternBase % "([A-Z][a-z]*).*"
classPattern = patternBase % "(.*)"
statPattern = "\n" + patternBase % r"(\d*)"
charMsPattern = "\n" + patternBase % r"(?:()|(\d)|.*(-\d).*)"
charMsPat = re.compile( namePattern + "\n" +
                        "\n".join( [ classPattern for i in range( NUM_STATS - 1 ) ] ) )
charBsPat = re.compile( namePattern + "\n" + classPattern +
                        "".join( [ statPattern for i in range( NUM_STATS + 1 ) ] ) )
grPat = re.compile( namePattern + "".join( [ statPattern for i in range( NUM_STATS ) ] ) )
# FeClasses require slightly different patterns due to extra formatting in some cases
feClassNamePattern = "<td.*>((?:[A-Z][a-z]*.*)+)</td>"
feClassStatPattern = "\n" + r"<td.*>(\d*)</td>"
classPat = re.compile( feClassNamePattern +
                       "".join( [ feClassStatPattern for i in range( NUM_STATS ) ] ) )
# end regex patterns }

# Open files for reading {
filepath = "/Users/michaelchin/Dropbox/FeStat/data/%s"
charBs = open( filepath % "char-bs.html", "r" )
charGrH = open( filepath % "hoshido-char-gr.html", "r" )
charMsH = open( filepath % "hoshido-char-ms.html", "r" )
charGrN = open( filepath % "nohr-char-gr.html", "r" )
charMsN = open( filepath % "nohr-char-ms.html", "r" )
classBsH = open( filepath % "hoshido-class-bs.html", "r" )
classMsH = open( filepath % "hoshido-class-ms.html", "r" )
classGrH = open( filepath % "hoshido-class-gr.html", "r" )
classBsN = open( filepath % "nohr-class-bs.html", "r" )
classMsN = open( filepath % "nohr-class-ms.html", "r" )
classGrN = open( filepath % "nohr-class-gr.html", "r" )

# Parse files
charBsMatch = charBsPat.findall( charBs.read() )
charMsMatch = charMsPat.findall( charMsH.read() + charMsN.read() )
charGrMatch = grPat.findall( charGrH.read() + charGrN.read() )
# Conveniently, all Class-data can be parsed using the same pattern
classBsMatch = classPat.findall( classBsH.read() + classBsN.read() )
classGrMatch = classPat.findall( classGrH.read() + classGrN.read() )
classMsMatch = classPat.findall( classMsH.read() + classMsN.read() )

# Close all files
charBs.close()
charGrH.close()
charMsH.close()
charGrN.close()
charMsN.close()
classBsH.close()
classMsH.close()
classGrH.close()
classBsN.close()
classMsN.close()
classGrN.close()
# end data-file io }

# Ensure output files are empty
open( filepath % "../CharDb.py", "w" ).close()
open( filepath % "../ClassDb.py", "w").close()

# START CLASS_DB GEN {
# Separate class base-stats, growth rates, and max-stats into buckets. From there, we can
# process all three stats for each class all at once.
classBucket = {}
# Fill buckets
for match in classBsMatch + classGrMatch + classMsMatch:
   name = Utils.processClassName( match[ 0 ] )
   if name not in classBucket:
      classBucket[ name ] = []
   classBucket[ name ].append( match[ 1: ] )
classBucket[ "Butler" ] = classBucket[ "Maid" ]
classBucket[ "ShrineMaiden" ] = classBucket[ "Monk" ]
# Convert bucket to entries
classDict = {}
for ( name, bucket ) in classBucket.iteritems():
   # Python preserves list order (supposedly). If that's the case:
   #    0 = base stats
   #    1 = growth rates
   #    2 = max stats
   classDict[ name ] = Utils.classBaseStatInitializer % ( name, str( bucket[ 0 ] ) )
   classDict[ name ] += Utils.growthRateInitializer % ( str( bucket[ 1 ] ) )
   classDict[ name ] += Utils.maxStatInitializer % ( str( bucket[ 2 ] ) )
   if name in Utils.promotedClass:
      classDict[ name ] += Utils.promotedInitializer % 'True'
   else:
      classDict[ name ] += Utils.promotedInitializer % 'False'
   classDict[ name ] += "\n"
# END CLASS_DB_GEN }

# Write ClassDb.py
classDb = open( filepath % "../ClassDb.py", "w" )
classDb.write( "import FeStatUtils as Utils\n\n" )
classDb.write( Utils.classParentClass )
for entry in classDict.itervalues():
   classDb.write( entry )
classDb.close()

# START CHAR_DB GEN {
# Generate character-classes by name
charDict = {}
# Start by processing base-stats, then mark character as 'processed'
for bs in charBsMatch:
   # Position 0=name, 1=class, 2=baseLevel
   name = bs[ 0 ]
   # Hack: for some reason, Saizo is listed on the base-stats page as 'Saizou'. Correct that.
   if name == "Saizou":
      name = "Saizo"
   if name in charDict:
      # We've seen this character before; move on
      continue
   baseClass = Utils.classTranslate( bs[ 1 ] )
   baseLevel = bs[ 2 ]
   charDict[ name ] = Utils.baseStatInitializer % ( name, baseLevel, baseClass, str( bs[ 3: ] ) )
# Next, process growth rates
processedChars = []
for gr in charGrMatch:
   name = gr[ 0 ]
   # Some characters are duplicate entries between the Hoshido and Nohrian pages. Don't process
   # them twice. Also, there are a list of characters not recruitable in the third path, and
   # since I'm currently only concerned with the third path, I'm going to ignore them. Let's be
   # real though: we already ignore them anyway.
   if ( name in processedChars ) or ( name in [ 'Izana', 'Yukimura' ] ) :
      continue
   else:
      processedChars.append( name )
   charDict[ name ] += Utils.growthRateInitializer % ( str( gr[ 1: ] ) )
# Last, process max-stats. Regex parsing grabs html-tags for negative numbers, so we need to
# process that out. numRe use is explained below...
negRe = re.compile( r"(-\d)" )
numRe = re.compile( r"(\d)" )
# Reset processedChars list. We'll reuse it for the same purpose
processedChars = []
for ms in charMsMatch:
   name = ms[ 0 ]
   # We may have duplicate entries, or potentially no entries due to regex parser (compared
   # to the other two stats, the regex for this sucks)
   if ( name in processedChars ) or ( name not in charDict ):
      continue
   else:
      processedChars.append( name )
   def processItem( item ):
      negMatch = negRe.search( item )
      if negMatch:
         return int( negMatch.group() )
      # Some entries, like Scarlet's, have dirty characters next to the numbers. Process
      # those out with regex...
      numMatch = numRe.search( item )
      if numMatch:
         return int( numMatch.group() )
      assert not item
      return 0
   processedTuple = tuple( [ 0 ] + [ processItem( i ) for i in ms[ 1: ] ] )
   charDict[ name ] += Utils.maxStatInitializer % ( str( processedTuple ) )
# END CHAR_DB GEN }

# Write CharDb.py, which has some dependency on ClassDb
charDb = open( filepath % "../CharDb.py", "w" )
charDb.write( "import FeStatUtils as Utils\nimport ClassDb\n\n" )
charDb.write( Utils.charParentClass )
for entry in charDict.itervalues():
   if "maxStats_" not in entry:
      # For the characters with no default maxStats, initialize the attr to None so that
      # maxStats() will return something
      entry += "\t\tself.maxStats_ = None\n"
   # Add a newline to every entry because formatting
   entry += "\n"
   charDb.write( entry )
charDb.close()
Tech Fingerprint

Standard Library: String & Text
Alerts (15)

'open(' Use 'with open()' to ensure Files are properly closed
27 28 29 30 31 32 33 34 35 36 37 63 64 96
'def' Ensure functions have docstrings for documentation
146