/java/projects/ImageCropper/pycropper/sampler.py

https://github.com/bashwork/common · Python · 34 lines · 28 code · 3 blank · 3 comment · 5 complexity · 07e7bb55650bc209a634b9cdd027c3da MD5 · raw file

  1. import os
  2. import random
  3. import urllib
  4. from cStringIO import StringIO
  5. import pandas as pd
  6. from PIL import Image
  7. from joblib import Parallel, delayed
  8. #
  9. # TODO joblib
  10. #
  11. size = 0.25
  12. output = 'validate'
  13. images = pd.read_csv('image-test-data.csv')
  14. images = images.URL.tolist()
  15. count = len(os.listdir(output))
  16. needed = 1000
  17. while count < needed:
  18. try:
  19. url = random.choice(images)
  20. path = url.split('?')[0].rsplit('/', 1)[-1]
  21. path = os.path.join(output, path)
  22. if os.path.exists(path):
  23. print "exists: " + path
  24. continue
  25. print "downloading: " + path
  26. data = urllib.urlopen(url).read()
  27. image = Image.open(StringIO(data))
  28. image = image.resize([int(s * size) for s in image.size])
  29. image.save(path)
  30. count += 1
  31. except ex: print ex.message