PageRenderTime 69ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/FeExtractStats.py

https://gitlab.com/michaelchin/FeStat
Python | 173 lines | 169 code | 2 blank | 2 comment | 1 complexity | 6731d428db9e103bc6ae5c6ba7b3dab1 MD5 | raw file
  1. import re
  2. import FeStatUtils as Utils
  3. # C const-ish declaration in python...
  4. NUM_STATS = 8
  5. # Set-up regex patterns {
  6. patternBase = "<td>%s</td>"
  7. namePattern = patternBase % "([A-Z][a-z]*).*"
  8. classPattern = patternBase % "(.*)"
  9. statPattern = "\n" + patternBase % r"(\d*)"
  10. charMsPattern = "\n" + patternBase % r"(?:()|(\d)|.*(-\d).*)"
  11. charMsPat = re.compile( namePattern + "\n" +
  12. "\n".join( [ classPattern for i in range( NUM_STATS - 1 ) ] ) )
  13. charBsPat = re.compile( namePattern + "\n" + classPattern +
  14. "".join( [ statPattern for i in range( NUM_STATS + 1 ) ] ) )
  15. grPat = re.compile( namePattern + "".join( [ statPattern for i in range( NUM_STATS ) ] ) )
  16. # FeClasses require slightly different patterns due to extra formatting in some cases
  17. feClassNamePattern = "<td.*>((?:[A-Z][a-z]*.*)+)</td>"
  18. feClassStatPattern = "\n" + r"<td.*>(\d*)</td>"
  19. classPat = re.compile( feClassNamePattern +
  20. "".join( [ feClassStatPattern for i in range( NUM_STATS ) ] ) )
  21. # end regex patterns }
  22. # Open files for reading {
  23. filepath = "/Users/michaelchin/Dropbox/FeStat/data/%s"
  24. charBs = open( filepath % "char-bs.html", "r" )
  25. charGrH = open( filepath % "hoshido-char-gr.html", "r" )
  26. charMsH = open( filepath % "hoshido-char-ms.html", "r" )
  27. charGrN = open( filepath % "nohr-char-gr.html", "r" )
  28. charMsN = open( filepath % "nohr-char-ms.html", "r" )
  29. classBsH = open( filepath % "hoshido-class-bs.html", "r" )
  30. classMsH = open( filepath % "hoshido-class-ms.html", "r" )
  31. classGrH = open( filepath % "hoshido-class-gr.html", "r" )
  32. classBsN = open( filepath % "nohr-class-bs.html", "r" )
  33. classMsN = open( filepath % "nohr-class-ms.html", "r" )
  34. classGrN = open( filepath % "nohr-class-gr.html", "r" )
  35. # Parse files
  36. charBsMatch = charBsPat.findall( charBs.read() )
  37. charMsMatch = charMsPat.findall( charMsH.read() + charMsN.read() )
  38. charGrMatch = grPat.findall( charGrH.read() + charGrN.read() )
  39. # Conveniently, all Class-data can be parsed using the same pattern
  40. classBsMatch = classPat.findall( classBsH.read() + classBsN.read() )
  41. classGrMatch = classPat.findall( classGrH.read() + classGrN.read() )
  42. classMsMatch = classPat.findall( classMsH.read() + classMsN.read() )
  43. # Close all files
  44. charBs.close()
  45. charGrH.close()
  46. charMsH.close()
  47. charGrN.close()
  48. charMsN.close()
  49. classBsH.close()
  50. classMsH.close()
  51. classGrH.close()
  52. classBsN.close()
  53. classMsN.close()
  54. classGrN.close()
  55. # end data-file io }
  56. # Ensure output files are empty
  57. open( filepath % "../CharDb.py", "w" ).close()
  58. open( filepath % "../ClassDb.py", "w").close()
  59. # START CLASS_DB GEN {
  60. # Separate class base-stats, growth rates, and max-stats into buckets. From there, we can
  61. # process all three stats for each class all at once.
  62. classBucket = {}
  63. # Fill buckets
  64. for match in classBsMatch + classGrMatch + classMsMatch:
  65. name = Utils.processClassName( match[ 0 ] )
  66. if name not in classBucket:
  67. classBucket[ name ] = []
  68. classBucket[ name ].append( match[ 1: ] )
  69. classBucket[ "Butler" ] = classBucket[ "Maid" ]
  70. classBucket[ "ShrineMaiden" ] = classBucket[ "Monk" ]
  71. # Convert bucket to entries
  72. classDict = {}
  73. for ( name, bucket ) in classBucket.iteritems():
  74. # Python preserves list order (supposedly). If that's the case:
  75. # 0 = base stats
  76. # 1 = growth rates
  77. # 2 = max stats
  78. classDict[ name ] = Utils.classBaseStatInitializer % ( name, str( bucket[ 0 ] ) )
  79. classDict[ name ] += Utils.growthRateInitializer % ( str( bucket[ 1 ] ) )
  80. classDict[ name ] += Utils.maxStatInitializer % ( str( bucket[ 2 ] ) )
  81. if name in Utils.promotedClass:
  82. classDict[ name ] += Utils.promotedInitializer % 'True'
  83. else:
  84. classDict[ name ] += Utils.promotedInitializer % 'False'
  85. classDict[ name ] += "\n"
  86. # END CLASS_DB_GEN }
  87. # Write ClassDb.py
  88. classDb = open( filepath % "../ClassDb.py", "w" )
  89. classDb.write( "import FeStatUtils as Utils\n\n" )
  90. classDb.write( Utils.classParentClass )
  91. for entry in classDict.itervalues():
  92. classDb.write( entry )
  93. classDb.close()
  94. # START CHAR_DB GEN {
  95. # Generate character-classes by name
  96. charDict = {}
  97. # Start by processing base-stats, then mark character as 'processed'
  98. for bs in charBsMatch:
  99. # Position 0=name, 1=class, 2=baseLevel
  100. name = bs[ 0 ]
  101. # Hack: for some reason, Saizo is listed on the base-stats page as 'Saizou'. Correct that.
  102. if name == "Saizou":
  103. name = "Saizo"
  104. if name in charDict:
  105. # We've seen this character before; move on
  106. continue
  107. baseClass = Utils.classTranslate( bs[ 1 ] )
  108. baseLevel = bs[ 2 ]
  109. charDict[ name ] = Utils.baseStatInitializer % ( name, baseLevel, baseClass, str( bs[ 3: ] ) )
  110. # Next, process growth rates
  111. processedChars = []
  112. for gr in charGrMatch:
  113. name = gr[ 0 ]
  114. # Some characters are duplicate entries between the Hoshido and Nohrian pages. Don't process
  115. # them twice. Also, there are a list of characters not recruitable in the third path, and
  116. # since I'm currently only concerned with the third path, I'm going to ignore them. Let's be
  117. # real though: we already ignore them anyway.
  118. if ( name in processedChars ) or ( name in [ 'Izana', 'Yukimura' ] ) :
  119. continue
  120. else:
  121. processedChars.append( name )
  122. charDict[ name ] += Utils.growthRateInitializer % ( str( gr[ 1: ] ) )
  123. # Last, process max-stats. Regex parsing grabs html-tags for negative numbers, so we need to
  124. # process that out. numRe use is explained below...
  125. negRe = re.compile( r"(-\d)" )
  126. numRe = re.compile( r"(\d)" )
  127. # Reset processedChars list. We'll reuse it for the same purpose
  128. processedChars = []
  129. for ms in charMsMatch:
  130. name = ms[ 0 ]
  131. # We may have duplicate entries, or potentially no entries due to regex parser (compared
  132. # to the other two stats, the regex for this sucks)
  133. if ( name in processedChars ) or ( name not in charDict ):
  134. continue
  135. else:
  136. processedChars.append( name )
  137. def processItem( item ):
  138. negMatch = negRe.search( item )
  139. if negMatch:
  140. return int( negMatch.group() )
  141. # Some entries, like Scarlet's, have dirty characters next to the numbers. Process
  142. # those out with regex...
  143. numMatch = numRe.search( item )
  144. if numMatch:
  145. return int( numMatch.group() )
  146. assert not item
  147. return 0
  148. processedTuple = tuple( [ 0 ] + [ processItem( i ) for i in ms[ 1: ] ] )
  149. charDict[ name ] += Utils.maxStatInitializer % ( str( processedTuple ) )
  150. # END CHAR_DB GEN }
  151. # Write CharDb.py, which has some dependency on ClassDb
  152. charDb = open( filepath % "../CharDb.py", "w" )
  153. charDb.write( "import FeStatUtils as Utils\nimport ClassDb\n\n" )
  154. charDb.write( Utils.charParentClass )
  155. for entry in charDict.itervalues():
  156. if "maxStats_" not in entry:
  157. # For the characters with no default maxStats, initialize the attr to None so that
  158. # maxStats() will return something
  159. entry += "\t\tself.maxStats_ = None\n"
  160. # Add a newline to every entry because formatting
  161. entry += "\n"
  162. charDb.write( entry )
  163. charDb.close()