PageRenderTime 37ms CodeModel.GetById 31ms RepoModel.GetById 1ms app.codeStats 0ms

/py/geturl.py

https://github.com/yangsong/projects
Python | 74 lines | 65 code | 4 blank | 5 comment | 2 complexity | 7d54519bb9bbe2942e10233954e8717e MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: UTF-8 -*-
  3. #Author: alvayang <alvayang@tabex.org>
  4. #Last Change:
  5. #Description:
  6. import sys
  7. import os
  8. import traceback
  9. import urllib2
  10. import urllib
  11. import libxml2
  12. try:
  13. import chardet
  14. except:
  15. pass
  16. urls = '/geturl'
  17. def get_resource(title, url):
  18. try:
  19. opener = urllib2.build_opener()
  20. opener.addHeaders = [
  21. ('Host', 'mp3.sogou.com'),
  22. ('User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3'),
  23. ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
  24. ('Accept-Language', 'zh-cn,en-us;q=0.7,en;q=0.3'),
  25. ('Accept-Encoding', 'gzip,deflate'),
  26. ('Accept-Charset', 'gb18030,utf-8;q=0.7,*;q=0.7'),
  27. ('Keep-Alive', '115'),
  28. ('Connection', 'keep-alive'),
  29. ('Referer', 'http://mp3.sogou.com/')]
  30. ureq = urllib2.Request(url)
  31. urllib2.install_opener(opener)
  32. f = opener.open(ureq)
  33. buf = ''
  34. z = f.read(4096)
  35. while z:
  36. buf += z
  37. z = f.read(4096)
  38. # 解析XML
  39. data = buf.decode('gb2312')
  40. parse_xml(title, buf)
  41. except:
  42. #print traceback.format_exc()
  43. sys.exit()
  44. def parse_xml(title, text):
  45. encoding = 'gbk'
  46. options = libxml2.HTML_PARSE_RECOVER + libxml2.HTML_PARSE_NOWARNING + libxml2.HTML_PARSE_NOERROR
  47. doc = libxml2.readDoc(text, None, encoding, options).doc
  48. ctxt = doc.xpathNewContext()
  49. items = ctxt.xpathEval(u'//downloadList')
  50. out = []
  51. ret = {}
  52. want = [u'size', u'urls', u'urlsource']
  53. if items:
  54. for z in items:
  55. for q in want:
  56. if q == "urls":
  57. durl = (z.xpathEval(q)[0].get_content()).decode('utf-8') if z.xpathEval(q) else u'没有合理的解释'
  58. newfilename = (urllib.unquote_plus(title.decode('utf-8').strip()) + "." + (durl.split(".")[-1]))
  59. print "Downloading:" + newfilename
  60. #newfilename = newfilename.decode('utf-8', 'ignore')
  61. #newfilename = newfilename.encode('utf-8', 'replace')
  62. cmd = "axel " + " \"" + durl + "\" --output=\"/opt/music/" + newfilename.replace(" ", "") + "\" && mplayer /opt/music/" + newfilename.replace(" ", "")
  63. #cmd = "cd /opt/music && curl " + " \"" + durl + "\" -o \"" + newfilename + "\" && cd -"
  64. try:
  65. print cmd
  66. os.system(cmd.encode('utf-8'))
  67. return
  68. except:
  69. print traceback.format_exc()