/scripts/common_stats.py

https://github.com/erjiang/polyworld
Python | 194 lines | 119 code | 44 blank | 31 comment | 27 complexity | 3773e7e22ce8a680c96b12ab2e99ec06 MD5 | raw file
  1. import os
  2. import re
  3. import sys
  4. import datalib
  5. import glob
  6. STAT_TYPES = ['agents', 'food', 'born', 'created', 'died', 'LifeSpan', 'CurNeurons', 'NeurGroups', 'CurNeurGroups', 'Domain[domain_index]FP[foodpatch_index][stat_index]']
  7. FILENAME_DATALIB = 'datalib.txt'
  8. ####################################################################################
  9. ###
  10. ### FUNCTION get_names()
  11. ###
  12. ####################################################################################
  13. def get_names(types):
  14. return types
  15. ####################################################################################
  16. ###
  17. ### FUNCTION relpath_stats()
  18. ###
  19. ####################################################################################
  20. def relpath_stats():
  21. return os.path.join('stats', FILENAME_DATALIB)
  22. ####################################################################################
  23. ###
  24. ### FUNCTION path_stats()
  25. ###
  26. ####################################################################################
  27. def path_stats(path_run):
  28. return os.path.join(path_run, relpath_stats())
  29. ####################################################################################
  30. ###
  31. ### FUNCTION path_run_from_stats()
  32. ###
  33. ####################################################################################
  34. def path_run_from_stats(path_stats, classification, dataset):
  35. suffix = relpath_stats()
  36. return path_stats[:-(len(suffix) + 1)]
  37. ####################################################################################
  38. ###
  39. ### FUNCTION parse_complexity
  40. ###
  41. ####################################################################################
  42. def parse_stats(run_paths, classification = None, dataset = None, types = None, run_as_key = False, quiet = False):
  43. # make sure the datalib files exist
  44. for path in run_paths:
  45. __get_stats( path, quiet )
  46. # parse the stats for all the runs
  47. tables = datalib.parse_all( map(lambda x: path_stats( x ),
  48. run_paths),
  49. types,
  50. datalib.REQUIRED,
  51. keycolname = 'step' )
  52. if run_as_key:
  53. # modify the map to use run dir as key, not Avr file
  54. tables = dict( [(path_run_from_stats( x[0],
  55. classification,
  56. dataset),
  57. x[1])
  58. for x in tables.items()] )
  59. return tables
  60. def __get_stats( path_run, quiet = False ):
  61. path_datalib = os.path.join( path_run, 'stats', FILENAME_DATALIB )
  62. if os.path.exists( path_datalib ):
  63. return datalib.parse( path_datalib,
  64. keycolname = 'step' )
  65. if not quiet:
  66. # This can take a long time, so let the user we're not hung
  67. print 'Converting stats files into datalib format for run', path_run
  68. tables = __create_tables( path_run, quiet )
  69. paths = glob.glob( os.path.join(path_run, 'stats', 'stat.*') )
  70. paths.sort( lambda x, y: __path2step(x) - __path2step(y) )
  71. for path in paths:
  72. __add_row( tables, path, quiet )
  73. if not quiet:
  74. print '\nwriting %s' % path_datalib
  75. datalib.write( path_datalib,
  76. tables,
  77. randomAccess = False )
  78. return tables
  79. def __parse_file( path, quiet ):
  80. if not quiet:
  81. print '\rparsing %s' % path,
  82. regex_label = r'-?[a-zA-Z]+'
  83. regex_number = r'-?[0-9]+(?:\.[0-9]+)?'
  84. regex_equals = r'\s*(%s)\s*=\s*(%s)(\s*$|\s+[^0-9])' % (regex_label, regex_number)
  85. major = None
  86. def __type(number):
  87. if '.' in number:
  88. return 'float'
  89. else:
  90. return 'int'
  91. for line in open( path ):
  92. line = line.strip()
  93. result = re.match( regex_equals, line )
  94. if result:
  95. label = result.group(1)
  96. number = result.group(2)
  97. type = __type(number)
  98. if label[0] == '-':
  99. label = major + label
  100. else:
  101. major = label
  102. yield label, type, number
  103. else:
  104. fields = line.split()
  105. if fields[0] == 'Domain':
  106. domain_id = fields[1]
  107. else:
  108. result = re.match( r'FP([0-9]+|\*)', fields[0] )
  109. if result:
  110. fp_id = result.group(1)
  111. for i in range(1, len(fields)):
  112. label = 'Domain[%s]FP[%s][%s]' % (domain_id, fp_id, i-1)
  113. number = fields[i]
  114. type = __type( number )
  115. yield label, type, number
  116. def __create_tables( path_run, quiet ):
  117. path = os.path.join( path_run, 'stats', 'stat.1' )
  118. tables = {}
  119. for label, type, value in __parse_file( path, quiet ):
  120. if label == 'step':
  121. continue
  122. colnames = ['step', 'value']
  123. coltypes = ['int', type]
  124. tables[label] = datalib.Table( label,
  125. colnames,
  126. coltypes,
  127. keycolname = 'step' )
  128. return tables
  129. def __add_row( tables, path, quiet ):
  130. step = __path2step( path )
  131. for label, type, value in __parse_file( path, quiet ):
  132. if label == 'step':
  133. assert( int(value) == step )
  134. continue
  135. if not label in tables:
  136. print 'Warning! Found label "%s" in %s, but not in stat.1' % (label,path)
  137. else:
  138. table = tables[label]
  139. row = table.createRow()
  140. row['step'] = step
  141. row['value'] = value
  142. def __path2step( path ):
  143. return int(path[ path.rfind('.') + 1 : ])
  144. def test():
  145. #for x in __parse_file( '../run_tau60k_from18k_ws200/stats/stat.1' ):
  146. # print x
  147. for tablename, table in datalib.parse( '../run_tau60k_from18k_ws200/stats/datalib.txt' ).items():
  148. print '---', tablename, table.name, '---'
  149. for row in table.rows():
  150. print row['value']