PageRenderTime 27ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/mrjob/fs/local.py

https://github.com/Roguelazer/mrjob
Python | 86 lines | 55 code | 15 blank | 16 comment | 16 complexity | 88e2084e283fac64e21adc910a18c8fa MD5 | raw file
  1. # Copyright 2009-2012 Yelp and Contributors
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. #
  5. # http://www.apache.org/licenses/LICENSE-2.0
  6. #
  7. # Unless required by applicable law or agreed to in writing, software
  8. # distributed under the License is distributed on an "AS IS" BASIS,
  9. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. # See the License for the specific language governing permissions and
  11. # limitations under the License.
  12. import glob
  13. import hashlib
  14. import logging
  15. import os
  16. import shutil
  17. from mrjob.fs.base import Filesystem
  18. from mrjob.parse import is_uri
  19. from mrjob.util import read_file
  20. log = logging.getLogger(__name__)
  21. class LocalFilesystem(Filesystem):
  22. """Filesystem for local files. Typically you will get one of these via
  23. ``MRJobRunner().fs``.
  24. """
  25. def can_handle_path(self, path):
  26. return not is_uri(path)
  27. def du(self, path_glob):
  28. return sum(os.path.getsize(path) for path in self.ls(path_glob))
  29. def ls(self, path_glob):
  30. for path in glob.glob(path_glob):
  31. if os.path.isdir(path):
  32. for dirname, _, filenames in os.walk(path):
  33. for filename in filenames:
  34. yield os.path.join(dirname, filename)
  35. else:
  36. yield path
  37. def _cat_file(self, filename):
  38. return read_file(filename)
  39. def mkdir(self, path):
  40. if not os.path.isdir(path):
  41. os.makedirs(path)
  42. def path_exists(self, path_glob):
  43. return bool(glob.glob(path_glob))
  44. def path_join(self, dirname, filename):
  45. """Join a directory name and filename."""
  46. return os.path.join(dirname, filename)
  47. def rm(self, path_glob):
  48. for path in glob.glob(path_glob):
  49. if os.path.isdir(path):
  50. log.debug('Recursively deleting %s' % path)
  51. shutil.rmtree(path)
  52. else:
  53. log.debug('Deleting %s' % path)
  54. os.remove(path)
  55. def touchz(self, path):
  56. if os.path.isfile(path) and os.path.getsize(path) != 0:
  57. raise OSError('Non-empty file %r already exists!' % (path,))
  58. # zero out the file
  59. with open(path, 'w'):
  60. pass
  61. def _md5sum_file(self, fileobj, block_size=(512 ** 2)): # 256K default
  62. md5 = hashlib.md5()
  63. while True:
  64. data = fileobj.read(block_size)
  65. if not data:
  66. break
  67. md5.update(data)
  68. return md5.hexdigest()
  69. def md5sum(self, path):
  70. with open(path, 'rb') as f:
  71. return self._md5sum_file(f)