PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/subtitle-downloader.py

https://gitlab.com/132nd-etcher/subtitle-downloader
Python | 124 lines | 104 code | 6 blank | 14 comment | 8 complexity | 99fc1f2312a0d0d5732b5211f301f31c MD5 | raw file
  1. #!/usr/bin/env python
  2. #-------------------------------------------------------------------------------
  3. # Name : subtitle downloader
  4. # Purpose : One step subtitle download
  5. #
  6. # Authors : manoj m j, arun shivaram p, Valentin Vetter, niroyb
  7. # Edited by : Valentin Vetter
  8. # Created :
  9. # Copyright : (c) www.manojmj.com
  10. # Licence : GPL v3
  11. #-------------------------------------------------------------------------------
  12. # TODO: use another DB if subs are not found on subDB
  13. import hashlib
  14. import os
  15. import sys
  16. import logging
  17. import requests,time,re,zipfile
  18. from bs4 import BeautifulSoup
  19. PY_VERSION = sys.version_info[0]
  20. if PY_VERSION == 2:
  21. import urllib2
  22. if PY_VERSION == 3:
  23. import urllib.request
  24. def get_hash(file_path):
  25. read_size = 64 * 1024
  26. with open(file_path, 'rb') as f:
  27. data = f.read(read_size)
  28. f.seek(-read_size, os.SEEK_END)
  29. data += f.read(read_size)
  30. return hashlib.md5(data).hexdigest()
  31. def sub_downloader(file_path):
  32. # Put the code in a try catch block in order to continue for other video files, if it fails during execution
  33. try:
  34. # Skip this file if it is not a video
  35. root, extension = os.path.splitext(file_path)
  36. if extension not in [".avi", ".mp4", ".mkv", ".mpg", ".mpeg", ".mov", ".rm", ".vob", ".wmv", ".flv", ".3gp",".3g2"]:
  37. return
  38. if not os.path.exists(root + ".srt"):
  39. headers = {'User-Agent': 'SubDB/1.0 (subtitle-downloader/1.0; http://github.com/manojmj92/subtitle-downloader)'}
  40. url = "http://api.thesubdb.com/?action=download&hash=" + get_hash(file_path) + "&language=en"
  41. if PY_VERSION == 3:
  42. req = urllib.request.Request(url, None, headers)
  43. response = urllib.request.urlopen(req).read()
  44. if PY_VERSION == 2:
  45. req = urllib2.Request(url, '', headers)
  46. response = urllib2.urlopen(req).read()
  47. with open(root + ".srt", "wb") as subtitle:
  48. subtitle.write(response)
  49. logging.info("Subtitle successfully downloaded for " + file_path)
  50. except:
  51. #download subs from subscene if not found in subdb
  52. sub_downloader2(file_path)
  53. def sub_downloader2(file_path):
  54. try:
  55. root, extension = os.path.splitext(file_path)
  56. if extension not in [".avi", ".mp4", ".mkv", ".mpg", ".mpeg", ".mov", ".rm", ".vob", ".wmv", ".flv", ".3gp",".3g2"]:
  57. return
  58. if os.path.exists(root + ".srt"):
  59. return
  60. j=-1
  61. root2=root
  62. for i in range(0,len(root)):
  63. if(root[i]=="\\"):
  64. j=i
  65. root=root2[j+1:]
  66. root2=root2[:j+1]
  67. r=requests.get("http://subscene.com/subtitles/release?q="+root);
  68. soup=BeautifulSoup(r.content,"lxml")
  69. atags=soup.find_all("a")
  70. href=""
  71. for i in range(0,len(atags)):
  72. spans=atags[i].find_all("span")
  73. if(len(spans)==2 and spans[0].get_text().strip()=="English"):
  74. href=atags[i].get("href").strip()
  75. if(len(href)>0):
  76. r=requests.get("http://subscene.com"+href);
  77. soup=BeautifulSoup(r.content,"lxml")
  78. lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href")
  79. r=requests.get("http://subscene.com"+lin);
  80. soup=BeautifulSoup(r.content,"lxml")
  81. subfile=open(root2+".zip", 'wb')
  82. for chunk in r.iter_content(100000):
  83. subfile.write(chunk)
  84. subfile.close()
  85. time.sleep(1)
  86. zip=zipfile.ZipFile(root2+".zip")
  87. zip.extractall(root2)
  88. zip.close()
  89. os.unlink(root2+".zip")
  90. except:
  91. #Ignore exception and continue
  92. print("Error in fetching subtitle for " + file_path)
  93. print("Error", sys.exc_info())
  94. logging.error("Error in fetching subtitle for " + file_path + str(sys.exc_info()))
  95. def main():
  96. root, _ = os.path.splitext(sys.argv[0])
  97. logging.basicConfig(filename=root + '.log', level=logging.INFO)
  98. logging.info("Started with params " + str(sys.argv))
  99. if len(sys.argv) == 1:
  100. print("This program requires at least one parameter")
  101. sys.exit(1)
  102. for path in sys.argv:
  103. if os.path.isdir(path):
  104. # Iterate the root directory recursively using os.walk and for each video file present get the subtitle
  105. for dir_path, _, file_names in os.walk(path):
  106. for filename in file_names:
  107. file_path = os.path.join(dir_path, filename)
  108. sub_downloader(file_path)
  109. else:
  110. sub_downloader(path)
  111. if __name__ == '__main__':
  112. main()