PageRenderTime 43ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/java/projects/ImageCropper/pycropper/sampler.py

https://github.com/bashwork/common
Python | 34 lines | 28 code | 3 blank | 3 comment | 5 complexity | 07e7bb55650bc209a634b9cdd027c3da MD5 | raw file
Possible License(s): GPL-2.0
  1. import os
  2. import random
  3. import urllib
  4. from cStringIO import StringIO
  5. import pandas as pd
  6. from PIL import Image
  7. from joblib import Parallel, delayed
  8. #
  9. # TODO joblib
  10. #
  11. size = 0.25
  12. output = 'validate'
  13. images = pd.read_csv('image-test-data.csv')
  14. images = images.URL.tolist()
  15. count = len(os.listdir(output))
  16. needed = 1000
  17. while count < needed:
  18. try:
  19. url = random.choice(images)
  20. path = url.split('?')[0].rsplit('/', 1)[-1]
  21. path = os.path.join(output, path)
  22. if os.path.exists(path):
  23. print "exists: " + path
  24. continue
  25. print "downloading: " + path
  26. data = urllib.urlopen(url).read()
  27. image = Image.open(StringIO(data))
  28. image = image.resize([int(s * size) for s in image.size])
  29. image.save(path)
  30. count += 1
  31. except ex: print ex.message