PageRenderTime 66ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/crunchy-xml-decoder/ultimate.py

https://gitlab.com/eientei95/crunchy-xml-decoder
Python | 351 lines | 316 code | 17 blank | 18 comment | 21 complexity | 89ac810184f74cc37e6465fe9753fc02 MD5 | raw file
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Crunchyroll Export Script DX - Last Updated 2014/07/16
  5. Removes need for rtmpExplorer
  6. ORIGINAL SOURCE:
  7. http://www.darkztar.com/forum/showthread.php?219034-Ripping-videos-amp-subtitles-from-Crunchyroll-%28noob-friendly%29
  8. """
  9. # import lxml
  10. import os
  11. import re
  12. import shutil
  13. import subprocess
  14. import sys
  15. #import HTMLParser
  16. import altfuncs
  17. from bs4 import BeautifulSoup
  18. from crunchyDec import CrunchyDec
  19. from unidecode import unidecode
  20. # ----------
  21. #onlymainsub=False
  22. def video():
  23. print 'Downloading video...'
  24. cmd = ['.\\video-engine\\rtmpdump',
  25. '-r', url1, '-a', url2,
  26. '-f', 'WIN 11,8,800,50',
  27. '-m', '15',
  28. '-W', 'http://static.ak.crunchyroll.com/versioned_assets/ChromelessPlayerApp.17821a0e.swf',
  29. '-p', page_url2,
  30. '-y', filen,
  31. '-o', '.\\export\\{}.flv'.format(title)]
  32. error = subprocess.call(cmd)
  33. # error = 0
  34. num = 1
  35. while error != 0 and num < 4:
  36. if error == 1:
  37. print '\nVideo failed to download, trying again. (' + str(num) + '/3)'
  38. error = subprocess.call(cmd)
  39. num += 1
  40. if error == 2:
  41. print '\nVideo download is incomplete, resuming. (' + str(num) + '/3)'
  42. error = subprocess.call(cmd + ' -e')
  43. num += 1
  44. if error != 0:
  45. print '\nVideo failed to download. Writing error...'
  46. if os.path.exists('error.log'):
  47. log = open('error.log', 'a')
  48. else:
  49. log = open('error.log', 'w')
  50. log.write(page_url2 + '\n')
  51. log.close()
  52. os.remove('.\\' + title + '.flv"')
  53. sys.exit()
  54. # ----------
  55. def subtitles(eptitle):
  56. global sub_id
  57. global sub_id2
  58. global sub_id3
  59. global sub_id4
  60. global sub_id5
  61. global sub_id6
  62. global lang
  63. xmllist = altfuncs.getxml('RpcApiSubtitle_GetListing', media_id)
  64. xmllist = unidecode(xmllist).replace('><', '>\n<')
  65. global hardcoded
  66. if '<media_id>None</media_id>' in xmllist:
  67. print 'The video has hardcoded subtitles.'
  68. hardcoded = True
  69. sub_id = False
  70. else:
  71. try:
  72. sub_id2 = re.findall("id=([0-9]+)", xmllist)
  73. sub_id3 = re.findall("title='(\[.+\]) ", xmllist)
  74. sub_id4 = re.findall("title='(\[.+\]) ", xmllist)
  75. sub_id5 = re.findall("title='(\[.+\]) ", xmllist)
  76. sub_id6 = re.findall("title='(\[.+\]) ", xmllist)
  77. hardcoded = False
  78. # try:
  79. # sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang1)), xmllist)[0]
  80. # lang = lang1
  81. # except IndexError:
  82. # try:
  83. # sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang2)), xmllist)[0]
  84. # lang = lang2
  85. except IndexError:
  86. print "The video's subtitles cannot be found, or are region-locked."
  87. hardcoded = True
  88. sub_id = False
  89. try:
  90. sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang1)), xmllist)[0]
  91. lang = lang1
  92. except IndexError:
  93. try:
  94. sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang2)), xmllist)[0]
  95. lang = lang2
  96. except IndexError:
  97. lang ='[English (US)]'
  98. sub_id3 = [word.replace('[English (US)]','eng') for word in sub_id3]
  99. sub_id3 = [word.replace('[Deutsch]','deu') for word in sub_id3]
  100. sub_id3 = [word.replace('[Portugues (Brasil)]','por') for word in sub_id3]
  101. sub_id3 = [word.replace('[Francais (France)]','fre') for word in sub_id3]
  102. sub_id3 = [word.replace('[Espanol (Espana)]','spa_spa') for word in sub_id3]
  103. sub_id3 = [word.replace('[Espanol]','spa') for word in sub_id3]
  104. sub_id3 = [word.replace('[Italiano]','ita') for word in sub_id3]
  105. sub_id3 = [word.replace('[l`rby@]','ara') for word in sub_id3]
  106. # sub_id4 = [word.replace('[l`rby@]',u'[العربية]') for word in sub_id4]
  107. sub_id4 = [word.replace('[l`rby@]',u'[Arabic]') for word in sub_id4]
  108. sub_id5 = [word.replace('[English (US)]','eng') for word in sub_id5]
  109. sub_id5 = [word.replace('[Deutsch]','deu') for word in sub_id5]
  110. sub_id5 = [word.replace('[Portugues (Brasil)]','por') for word in sub_id5]
  111. sub_id5 = [word.replace('[Francais (France)]','fre') for word in sub_id5]
  112. sub_id5 = [word.replace('[Espanol (Espana)]','spa_spa') for word in sub_id5]
  113. sub_id5 = [word.replace('[Espanol]','spa') for word in sub_id5]
  114. sub_id5 = [word.replace('[Italiano]','ita') for word in sub_id5]
  115. sub_id5 = [word.replace('[l`rby@]','ara') for word in sub_id5]
  116. # sub_id6 = [word.replace('[l`rby@]',u'[العربية]') for word in sub_id6]
  117. sub_id6 = [word.replace('[l`rby@]',u'[Arabic]') for word in sub_id6]
  118. # else:
  119. # try:
  120. # sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang1)), xmllist)[0]
  121. # hardcoded = False
  122. # lang = lang1
  123. # except IndexError:
  124. # try:
  125. # sub_id = re.findall("id=([0-9]+)' title='\["+re.escape(unidecode(lang2)), xmllist)[0]
  126. # print 'Language not found, reverting to ' + lang2 + '.'
  127. # hardcoded = False
  128. # lang = lang2
  129. # except IndexError:
  130. # try:
  131. # sub_id = re.findall("id=([0-9]+)' title='\[English", xmllist)[0] # default back to English
  132. # print 'Backup language not found, reverting to English.'
  133. # hardcoded = False
  134. # lang = 'English'
  135. # except IndexError:
  136. # print "The video's subtitles cannot be found, or are region-locked."
  137. # hardcoded = True
  138. # sub_id = False
  139. if not hardcoded:
  140. for i in sub_id2:
  141. #xmlsub = altfuncs.getxml('RpcApiSubtitle_GetXml', sub_id)
  142. xmlsub = altfuncs.getxml('RpcApiSubtitle_GetXml', i)
  143. formattedsubs = CrunchyDec().returnsubs(xmlsub)
  144. #subfile = open(eptitle + '.ass', 'wb')
  145. subfile = open('.\\export\\'+title+'['+sub_id3.pop(0)+']'+sub_id4.pop(0)+'.ass', 'wb')
  146. subfile.write(formattedsubs.encode('utf-8-sig'))
  147. subfile.close()
  148. #shutil.move(eptitle + '.ass', os.path.join(os.getcwd(), 'export', ''))
  149. # ----------
  150. def ultimate(page_url, seasonnum, epnum):
  151. global url1, url2, filen, title, media_id, lang1, lang2, hardcoded, forceusa, page_url2
  152. #global player_revision
  153. print '''
  154. --------------------------
  155. ---- Start New Export ----
  156. --------------------------
  157. CrunchyRoll Downloader Toolkit DX v0.98
  158. Crunchyroll hasn't changed anything.
  159. If you don't have a premium account, go and sign up for one now. It's well worthit, and supports the animators.
  160. ----------
  161. Booting up...
  162. '''
  163. if page_url == '':
  164. page_url = raw_input('Please enter Crunchyroll video URL:\n')
  165. try:
  166. int(page_url)
  167. page_url = 'http://www.crunchyroll.com/media-' + page_url
  168. except ValueError:
  169. if not page_url.startswith('http://') and not page_url.startswith('https://'):
  170. page_url = 'http://' + page_url
  171. try:
  172. int(page_url[-6:])
  173. except ValueError:
  174. if bool(seasonnum) and bool(epnum):
  175. page_url = altfuncs.vidurl(page_url, seasonnum, epnum)
  176. elif bool(epnum):
  177. page_url = altfuncs.vidurl(page_url, 1, epnum)
  178. else:
  179. page_url = altfuncs.vidurl(page_url, False, False)
  180. subprocess.call('title ' + page_url.replace('http://www.crunchyroll.com/', ''), shell=True)
  181. # ----------
  182. #lang1, lang2 = altfuncs.config()
  183. #lang1, lang2, forcesub = altfuncs.config()
  184. lang1, lang2, forcesub, forceusa, localizecookies, vquality, onlymainsub = altfuncs.config()
  185. #player_revision = altfuncs.playerrev(page_url)
  186. html = altfuncs.gethtml(page_url)
  187. #h = HTMLParser.HTMLParser()
  188. title = re.findall('<title>(.+?)</title>', html)[0].replace('Crunchyroll - Watch ', '')
  189. if len(os.getcwd()+'\\export\\'+title+'.flv') > 255:
  190. title = re.findall('^(.+?) \- ', title)[0]
  191. # title = h.unescape(unidecode(title)).replace('/', ' - ').replace(':', '-').
  192. # replace('?', '.').replace('"', "''").replace('|', '-').replace('&quot;',"''").strip()
  193. ### Taken from http://stackoverflow.com/questions/6116978/python-replace-multiple-strings ###
  194. rep = {' / ': ' - ', '/': ' - ', ':': '-', '?': '.', '"': "''", '|': '-', '&quot;': "''", 'a*G':'a G', '*': '#', u'\u2026': '...'}
  195. rep = dict((re.escape(k), v) for k, v in rep.iteritems())
  196. pattern = re.compile("|".join(rep.keys()))
  197. title = unidecode(pattern.sub(lambda m: rep[re.escape(m.group(0))], title))
  198. ### End stolen code ###
  199. subprocess.call('title ' + title.replace('&', '^&'), shell=True)
  200. # ----------
  201. media_id = page_url[-6:]
  202. xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoPlayer_GetStandardConfig', media_id), 'xml')
  203. try:
  204. if '4' in xmlconfig.find_all('code')[0]:
  205. print xmlconfig.find_all('msg')[0].text
  206. sys.exit()
  207. except IndexError:
  208. pass
  209. vid_id = xmlconfig.find('media_id').string
  210. # ----------
  211. try:
  212. host = xmlconfig.find('host').string
  213. except AttributeError:
  214. print 'Downloading 2 minute preview.'
  215. media_id = xmlconfig.find('media_id').string
  216. xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoEncode_GetStreamInfo', media_id), 'xml')
  217. host = xmlconfig.find('host').string
  218. if re.search('fplive\.net', host):
  219. url1 = re.findall('.+/c[0-9]+', host).pop()
  220. url2 = re.findall('c[0-9]+\?.+', host).pop()
  221. else:
  222. url1 = re.findall('.+/ondemand/', host).pop()
  223. url2 = re.findall('ondemand/.+', host).pop()
  224. filen = xmlconfig.find('file').string
  225. # ----------
  226. if 'subs' in sys.argv:
  227. subtitles(title)
  228. subs_only = True
  229. hardcoded = True # bleh
  230. else:
  231. page_url2 = page_url
  232. video()
  233. #heightp = subprocess.Popen('"video-engine\MediaInfo.exe" --inform=Video;%Height% ".\export\\' + title + '.flv"' ,shell=True , stdout=subprocess.PIPE).stdout.read()
  234. heightp = '360p' if xmlconfig.height.string == '368' else '{0}p'.format(xmlconfig.height.string) # This is less likely to fail
  235. subtitles(title)
  236. subtitlefilecode=''
  237. #shutil.move(title + '.flv', os.path.join(os.getcwd(), 'export', ''))
  238. print 'Starting mkv merge'
  239. if hardcoded:
  240. subprocess.call('"video-engine\mkvmerge.exe" -o ".\export\\' + title + '[' + heightp.strip() +'].mkv" --language 1:jpn -a 1 -d 0 ' +
  241. '".\export\\' + title + '.flv"' +' --title "' + title +'"')
  242. else:
  243. sublang = {u'Español (Espana)': 'spa_spa', u'Français (France)': 'fre', u'Português (Brasil)': 'por',
  244. u'English': 'eng', u'Español': 'spa', u'Türkçe': 'tur', u'Italiano': 'ita',
  245. u'العربية': 'ara', u'Deutsch': 'deu'}[lang]
  246. # defaulttrack = False
  247. #print lang.encode('utf-8')
  248. #print sub_id5
  249. #print sub_id6
  250. for i in sub_id2:
  251. defaultsub=''
  252. sublangc=sub_id5.pop(0)
  253. sublangn=sub_id6.pop(0)
  254. # print forcesub
  255. if not forcesub:
  256. if sublangc == sublang:
  257. defaultsub=' --default-track 0:yes --forced-track 0:no'
  258. else:
  259. defaultsub=' --default-track 0:no --forced-track 0:no'
  260. else:
  261. if sublangc == sublang:
  262. defaultsub=' --default-track 0:yes --forced-track 0:yes'
  263. else:
  264. defaultsub=' --default-track 0:no --forced-track 0:no'
  265. if not onlymainsub:
  266. subtitlefilecode=subtitlefilecode+' --language 0:' + sublangc.replace('spa_spa','spa') + defaultsub +' --track-name 0:"' + sublangn + '" -s 0 ".\export\\'+title+'['+sublangc+']'+sublangn+'.ass"'
  267. else:
  268. if sublangc == sublang:
  269. subtitlefilecode=subtitlefilecode+' --language 0:' + sublangc.replace('spa_spa','spa') + defaultsub +' --track-name 0:"' + sublangn + '" -s 0 ".\export\\'+title+'['+sublangc+']'+sublangn+'.ass"'
  270. # subprocess.call('"video-engine\mkvmerge.exe" -o ".\export\\' + title + '.mkv" --language 1:jpn -a 1 -d 0 ' +
  271. # '".\export\\' + title + '.flv" --language 0:' + sublang + ' -s 0 ".\export\\'+title+'.ass"')
  272. # print '"video-engine\mkvmerge.exe" -o ".\export\\' + title + '.mkv" --language 0:jpn --language 1:jpn -a 1 -d 0 ' + '".\export\\' + title + '.flv"' + subtitlefilecode +' --title "' + title +'"'
  273. mkvcmd='"video-engine\mkvmerge.exe" -o ".\export\\' + title + '[' + heightp.strip() +'].mkv" --language 0:jpn --language 1:jpn -a 1 -d 0 ' + '".\export\\' + title + '.flv"' + subtitlefilecode +' --title "' + title +'"'
  274. # print mkvcmd
  275. #print subtitlefilecode
  276. subprocess.call(mkvcmd)
  277. print 'Merge process complete'
  278. subs_only = False
  279. print
  280. print '----------'
  281. print
  282. print 'Starting Final Cleanup'
  283. if not subs_only:
  284. os.remove(os.path.join(os.getcwd(), 'export', '') + title + '.flv')
  285. if not hardcoded or not subs_only:
  286. #os.remove(os.path.join(os.getcwd(), 'export', '') + title + '.ass')
  287. for root, dirs, files in os.walk('export'):
  288. for file in filter(lambda x: re.match(title +'\[.+\]'+ '.ass', x), files):
  289. os.remove(os.path.join(root, file))
  290. print 'Cleanup Complete'
  291. # ----------
  292. if __name__ == '__main__':
  293. try:
  294. page_url = sys.argv[1]
  295. except IndexError:
  296. page_url = ''
  297. try:
  298. seasonnum, epnum = sys.argv[2:4]
  299. except ValueError:
  300. try:
  301. epnum = str(int(sys.argv[2]))
  302. seasonnum = ''
  303. except IndexError:
  304. # sys.exit('No season or episode numbers.')
  305. seasonnum, epnum = '', ''
  306. pass
  307. ultimate(page_url, seasonnum, epnum)