PageRenderTime 41ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/tools/gaiafusion.py

https://github.com/paucm/gaia
Python | 137 lines | 77 code | 34 blank | 26 comment | 22 complexity | 63973e18e79ca0879fb899f96ac83d3a MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-3.0, LGPL-2.0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Copyright (C) 2006-2013 Music Technology Group - Universitat Pompeu Fabra
  4. #
  5. # This file is part of Gaia
  6. #
  7. # Gaia is free software: you can redistribute it and/or modify it under
  8. # the terms of the GNU Affero General Public License as published by the Free
  9. # Software Foundation (FSF), either version 3 of the License, or (at your
  10. # option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful, but WITHOUT
  13. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  15. # details.
  16. #
  17. # You should have received a copy of the Affero GNU General Public License
  18. # version 3 along with this program. If not, see http://www.gnu.org/licenses/
  19. from gaia2.fusion import transformDataSet, mergeChunk, mergeAll, mergeDirectory
  20. from optparse import OptionParser, OptionGroup
  21. from os.path import isdir, isfile
  22. import sys, gaia2
  23. def createOptionParser():
  24. parser = OptionParser(usage = 'usage: %prog [options] input_sigfiles output_db\n\n' +
  25. 'where input_sigfiles can be either a yaml file containing the mapping ids -> sigfiles,\n' +
  26. 'or a directory containing them (in which case the ids will be guessed from the filenames)\n\n' +
  27. 'input_sigfiles and output_db can be specified either as keyword options or as positional arguments.')
  28. parser.add_option("-y", "--yamllist", dest="yamllist",
  29. help="the Yaml file containing the list of files to be merged")
  30. parser.add_option("-d", "--directory", dest="directory",
  31. help="the directory containing all the sigfiles to be merged")
  32. parser.add_option("-o", "--output", dest="outputFile",
  33. help="the filename of the output dataset")
  34. parser.add_option("-c", "--chunksize", dest="chunkSize", default = 30000,
  35. help="the size of the chunks to be used for merging")
  36. parser.add_option("-t", "--transfofile", dest="transfoFile",
  37. help="the file containing the original transformations to be applied, in yaml format")
  38. parser.add_option("-s", "--select", dest="select",
  39. help="the descriptors to be included when initally loading the points")
  40. parser.add_option("-e", "--exclude", dest="exclude",
  41. help="the descriptors to be excluded when initally loading the points")
  42. advancedGroup = OptionGroup(parser, 'Advanced Options',
  43. 'Note: use these options only if you know what you\'re doing. You\'ve been warned...')
  44. advancedGroup.add_option("-r", "--transform",
  45. action="store_true", dest="transform", default=False,
  46. help="apply basic transformations to dataset instead of merging it")
  47. advancedGroup.add_option("-i", "--input", dest="inputFile",
  48. help="the filename of the input dataset. Only valid when transforming datasets")
  49. parser.add_option_group(advancedGroup)
  50. return parser
  51. def usage():
  52. '''gaiafusion --yamllist=mylist.yaml --output=data/itunes.db
  53. gaiafusion -d essentia_1.0.6/sigfiles -o amazon.db
  54. gaiafusion --chunksize=10000 ...
  55. '''
  56. createOptionParser().print_help()
  57. sys.exit(1)
  58. def fusion():
  59. options, args = createOptionParser().parse_args()
  60. # dispatch the positional args into their named equivalents
  61. try:
  62. if options.transform:
  63. if not options.inputFile:
  64. options.inputFile, args = args[0], args[1:]
  65. if not options.outputFile:
  66. options.outputFile, args = args[0], args[1:]
  67. else: # no transform, just merge
  68. if not options.yamllist and not options.directory:
  69. path, args = args[0], args[1:]
  70. if isfile(path):
  71. options.yamllist = path
  72. elif isdir(path):
  73. options.directory = path
  74. else:
  75. raise IOError('File/dir not found: %s' % path)
  76. if not options.outputFile:
  77. options.outputFile, args = args[0], args[1:]
  78. # if there are any remaining args, then we have a problem...
  79. if args:
  80. usage()
  81. except IndexError:
  82. usage()
  83. # TODO: validate parameters
  84. # dispatch request
  85. if options.transform:
  86. transformDataSet(options.inputFile, options.outputFile, options.transfoFile)
  87. else:
  88. select, exclude = None, None
  89. if options.select:
  90. select = options.select.split(',')
  91. if options.exclude:
  92. exclude = options.exclude.split(',')
  93. if options.yamllist:
  94. mergeAll(options.yamllist, options.outputFile, int(options.chunkSize), options.transfoFile, select, exclude)
  95. elif options.directory:
  96. if not isdir(options.directory):
  97. print 'ERROR: %s is not a valid directory' % options.directory
  98. sys.exit(1)
  99. mergeDirectory(options.directory, options.outputFile, int(options.chunkSize), options.transfoFile, select, exclude)
  100. else:
  101. usage()
  102. if __name__ == '__main__':
  103. fusion()