/src/tools/scripts/post_pgrc.py

https://github.com/sslab-gatech/mosaic · Python · 171 lines · 127 code · 35 blank · 9 comment · 22 complexity · 1a80ee836bc1b92a25b5ea98de46f55d MD5 · raw file

  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import glob
  5. import struct
  6. import optparse
  7. import math
  8. import random
  9. import utils
  10. from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
  11. import config_engine as conf
  12. tile_filename = "tiles.dat"
  13. meta_filename = "meta.dat"
  14. stat_filename = "tile_stats.dat"
  15. tile_prefix = "eb-"
  16. meta_prefix = "ebi-"
  17. stat_prefix = "ebs-"
  18. random_state = random.getstate()
  19. if os.getcwd() != os.path.dirname(__file__):
  20. print("Not confident this code is working outside the /scripts dir")
  21. exit(1)
  22. def fix_block_id(tgt_file, block_id):
  23. with open(tgt_file, 'r+b') as f:
  24. # uintn64_t block_id
  25. f.write(struct.pack('@Q', block_id))
  26. def reshuffle_files(original_paths, output_paths, prefix, output_file_name,
  27. truncate, alignment, file_truncate, file_truncate_size, note=""):
  28. def _dump_per_dir(output_path, output_file_name,
  29. out_dir, truncate, alignment, file_truncate, file_truncate_size):
  30. output_file = os.path.join(output_path, output_file_name)
  31. for single_file in out_dir:
  32. file_content = ""
  33. with open(single_file, "rb") as f:
  34. file_content = f.read()
  35. if truncate:
  36. aligned_size = alignment - (len(file_content) % alignment)
  37. if aligned_size % alignment != 0:
  38. file_content += b'\0' * aligned_size
  39. with open(output_file, "ab") as f:
  40. f.write(file_content)
  41. if file_truncate:
  42. append_bytes = b'\0' * file_truncate_size
  43. with open(output_file, "ab") as f:
  44. f.write(append_bytes)
  45. os.sync()
  46. count_output = len(output_paths)
  47. filename_to_file = dict()
  48. files_per_output = list()
  49. for i in range(0, count_output):
  50. files_per_output.append(list())
  51. files = list()
  52. for original_path in original_paths:
  53. prefix_pattern = "*/" + prefix + "*"
  54. partial_files = glob.glob(os.path.join(original_path, prefix_pattern))
  55. for single_file in partial_files:
  56. filename = os.path.basename(single_file)
  57. filename_to_file[filename] = single_file
  58. files.append(filename)
  59. sorted_files = sorted(files)
  60. current_output = 0
  61. for single_file in sorted_files:
  62. files_per_output[current_output].append(filename_to_file[single_file])
  63. current_output = (current_output + 1) % count_output
  64. dir_index = 0
  65. rfutures = []
  66. with ThreadPoolExecutor(max_workers=len(output_paths)) as shuffle_exec:
  67. for output_path in output_paths:
  68. rfutures.append(shuffle_exec.submit(_dump_per_dir,
  69. output_path, output_file_name,
  70. files_per_output[dir_index],
  71. truncate, alignment,
  72. file_truncate,
  73. file_truncate_size))
  74. #_dump_per_dir(output_path, output_file_name, files_per_output[dir_index], truncate, alignment)
  75. dir_index += 1
  76. for f in as_completed(rfutures):
  77. pass
  78. return ("done with %s" % note)
  79. def copy_global_stats_file(global_dir, meta_dirs):
  80. stat_filename = os.path.join(global_dir, "stat.dat")
  81. for meta_dir in meta_dirs:
  82. cmd = "cp %s %s" % (stat_filename, meta_dir)
  83. os.system(cmd)
  84. def setup_directories(output_meta_dirs, output_tile_dirs):
  85. for output_meta_dir in output_meta_dirs:
  86. utils.mkdirp(output_meta_dir, conf.FILE_GROUP)
  87. for output_tile_dir in output_tile_dirs:
  88. utils.mkdirp(output_tile_dir, conf.FILE_GROUP)
  89. def post_graph_load(original_meta_dirs, output_meta_dirs,
  90. original_tile_dirs, output_tile_dirs,
  91. globals_dir, shuffle):
  92. random.seed(1)
  93. random_state = random.getstate()
  94. setup_directories(output_meta_dirs, output_tile_dirs)
  95. futures = []
  96. with ProcessPoolExecutor(max_workers=(3*len(output_tile_dirs))) as executor:
  97. #print("# Rearranging tile-files...")
  98. #reshuffle_files(original_tile_dirs, output_tile_dirs, tile_prefix, tile_filename, True, 4096)
  99. futures.append(executor.submit(reshuffle_files,
  100. original_tile_dirs, output_tile_dirs,
  101. tile_prefix, tile_filename, True, 4096, True, 1024 * 1024,
  102. "tile-files"))
  103. #print("# Rearranging meta-files...")
  104. #reshuffle_files(original_meta_dirs, output_meta_dirs, meta_prefix, meta_filename, True, 4096)
  105. futures.append(executor.submit(reshuffle_files,
  106. original_meta_dirs, output_meta_dirs,
  107. meta_prefix, meta_filename, True, 4096, True, 1024 * 1024,
  108. "meta-files"))
  109. #print("# Rearranging stat-files...")
  110. #reshuffle_files(original_meta_dirs, output_meta_dirs, stat_prefix, stat_filename, False, 0)
  111. futures.append(executor.submit(reshuffle_files,
  112. original_meta_dirs, output_meta_dirs,
  113. stat_prefix, stat_filename, False, 0, False, 0,
  114. "stat-files"))
  115. for f in as_completed(futures):
  116. print (f.result())
  117. copy_global_stats_file(globals_dir, output_meta_dirs)
  118. if __name__ == "__main__":
  119. parser = optparse.OptionParser()
  120. parser.add_option("--globals-dir", help="path to global meta data")
  121. parser.add_option("--original-meta", help="paths to tile meta data separated by ':'")
  122. parser.add_option("--output-meta", help="paths to tile meta data separated by ':'")
  123. parser.add_option("--original-tile", help="paths to tile edge data separated by ':'")
  124. parser.add_option("--output-tile", help="paths to tile edge data separated by ':'")
  125. parser.add_option("--partition", help="paths to partition data, seperated by ':'")
  126. parser.add_option("--shuffle", action="store_true", default = False)
  127. (opts, args) = parser.parse_args()
  128. shuffle = opts.shuffle
  129. original_meta_dirs = opts.original_meta.split(':')
  130. output_meta_dirs = opts.output_meta.split(':')
  131. original_tile_dirs = opts.original_tile.split(':')
  132. output_tile_dirs = opts.output_tile.split(':')
  133. print("# Rearrange meta, tile and stat-files...")
  134. post_graph_load(original_meta_dirs, output_meta_dirs,
  135. original_tile_dirs, output_tile_dirs,
  136. shuffle)