PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/scripts/baicai/b_bantang.py

https://gitlab.com/liningpifu/wishing
Python | 183 lines | 164 code | 18 blank | 1 comment | 27 complexity | 9b48bd6f912cd0b54b3a49017e476df6 MD5 | raw file
  1. #coding:utf-8
  2. import tornado
  3. from tornado import template, httpclient
  4. import hashlib, urllib
  5. import sqlalchemy, models
  6. from sqlalchemy.sql import and_, or_, not_
  7. from mixin import staff_user
  8. from decimal import Decimal
  9. import settings
  10. import re, logging, random
  11. import time,datetime
  12. from datetime import datetime, timedelta
  13. import json
  14. from uuid import uuid1
  15. from celerytask import tasks as async_tasks
  16. from contrib import alimama
  17. CURRENT_SOURCE = 2
  18. IOS_UA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D201'
  19. CATE_MAP = {
  20. "taojujia": 1,
  21. "taomeishi": 6,
  22. "taodianqi": 14,
  23. "taomeizhuang": 5,
  24. "taoqita": 8,
  25. "taojujia": 4
  26. }
  27. MAP_CATE = {v: k for k,v in CATE_MAP.items()}
  28. def random_datetime():
  29. now = datetime.now()
  30. hour = now.hour + 2 if (now.hour + 2) < 22 else now.hour
  31. new_dt = datetime(year = now.year, month = now.month, day = now.day, hour = hour, minute = random.randint(1,59), second = random.randint(1, 59))
  32. return new_dt
  33. def fetch_url(url, callback, method = 'GET', data = {}, headers = {}, follow_redirects = True, use_proxy=False):
  34. httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
  35. http_client = httpclient.AsyncHTTPClient()
  36. if data:
  37. body = urllib.urlencode(data)
  38. else:
  39. body = None
  40. if not use_proxy:
  41. req = httpclient.HTTPRequest(url, method = method, connect_timeout = 2.0, request_timeout = 5.0, body = body, headers = headers, follow_redirects = follow_redirects)
  42. else:
  43. req = httpclient.HTTPRequest(url, method = method, connect_timeout = 2.0, request_timeout = 5.0, body = body, headers = headers, follow_redirects = follow_redirects, proxy_host = '106.187.52.236', proxy_port=8080)
  44. try:
  45. http_client.fetch(req, callback)
  46. except httpclient.HTTPError,e:
  47. if e.code == 599:
  48. logging.log('request timeout|%s' % url )
  49. return callback(None)
  50. def crawl_list(url):
  51. fetch_url(url, _callback_crawl_list, headers = {'User-Agent': IOS_UA, 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, use_proxy = False)
  52. def _callback_crawl_list(resp):
  53. if resp.code < 400:
  54. data = json.loads(resp.body)
  55. list_data = data["data"]["topic"]
  56. for topic in list_data:
  57. list_url = 'http://open3.bantangapp.com/topic/info?app_installtime=1436362830.922099&app_versions=4.1&channel_name=appStore&client_id=bt_app_ios&client_secret=9c1e6634ce1c5098e056628cd66a17a5&id=$cate&oauth_token=13248832ebfb692ab92d7a60d7051da0&os_versions=8.4.1&screensize=1242&statistics_uv=1&track_device_info=iPhone&track_deviceid=98D698CD-874B-40AF-919E-B346ECA78A47&track_user_id=1503&v=6'
  58. list_url = list_url.replace("$cate", str(topic["id"]))
  59. do_crawl(list_url)
  60. def do_crawl(url):
  61. print url
  62. fetch_url(url, _callback_crawl, headers = {'User-Agent': IOS_UA, 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, use_proxy = False)
  63. def _callback_crawl(resp):
  64. if resp.code < 400:
  65. zhe_data = json.loads(resp.body)
  66. print 'bantang success'
  67. for good in zhe_data['data']["product"]:
  68. save_item(good)
  69. else:
  70. print 'ERROR:', resp.code
  71. def add_to_topic(deal, url_name, session):
  72. topic_id = 1 if url_name == 'baoyou' else 2
  73. if deal.id == 0:print 'deal id is 0--------------'
  74. if topic_id == 1 and deal.now_price > 9.9:
  75. return
  76. if topic_id == 2 and deal.now_price > 20:
  77. return
  78. if not deal.is_onsale:
  79. return
  80. if url_name == 'baoyou' or url_name == 'fengding':
  81. many2many_deal = session.query(models.DealInTopic).filter_by(topic_id=topic_id, deal_id = deal.id).first()
  82. if not many2many_deal:
  83. many2many_deal = models.DealInTopic()
  84. many2many_deal.topic_id = topic_id
  85. many2many_deal.deal_id = deal.id
  86. if not many2many_deal.id:
  87. session.add(many2many_deal)
  88. session.commit()
  89. print 'add to topic success'
  90. taobao_union = alimama.TaobaoUnion('zhangxiaolei1982','zxl234567')
  91. FROM_TAOBAO_COUNT = 0
  92. def save_item(good):
  93. session = models.DefaultSession()
  94. deal = models.DealItem()
  95. deal.title = good['title'].replace(u'?~@~P?~J?~W??~J?~V??~@~Q', '').encode('utf-8').replace('?~@~P?~J?~W??~J?~V??~@~Q', '')
  96. cate_url_name = MAP_CATE.get(int(good["category"]))
  97. if not cate_url_name:
  98. cate_url_name = "taojujia"
  99. deal.cate_id = models.StaticCategory_Dict.get(cate_url_name).get('id')
  100. deal.pic_url = good['pic'][0]['pic']
  101. price = good['price'].replace(u'?~E~C', '')
  102. deal.now_price = Decimal(price)
  103. deal.origin_price = Decimal(price)
  104. deal.begin_time = datetime.now()
  105. deal.expire_time = datetime.now() + timedelta(days = 3)
  106. deal.source = CURRENT_SOURCE
  107. deal.is_onsale = False
  108. deal.date_created = random_datetime()
  109. def _get_et(res_data):
  110. global FROM_TAOBAO_COUNT
  111. num_iid = res_data["item_id"]
  112. print 'num_iid', num_iid
  113. deal_ol = session.query(models.DealItem).filter_by(num_iid = num_iid).first()
  114. if not deal_ol:
  115. deal.num_iid = num_iid
  116. try:
  117. deal.taoke_url = taobao_union.convert_url(num_iid)
  118. except alimama.LoginErrorException:
  119. pass
  120. if deal.taoke_url:
  121. deal.is_onsale = True
  122. if not deal_ol:
  123. session.add(deal)
  124. session.commit()
  125. add_to_topic(deal, cate_url_name, session)
  126. else:
  127. deal_ol.now_price = deal.now_price
  128. deal_ol.origin_price = deal.origin_price
  129. deal_ol.expire_time = deal.expire_time
  130. deal_ol.begin_time = deal.begin_time
  131. deal_ol.state = 0
  132. if deal_ol.is_onsale == False:
  133. deal_ol.taoke_url = taobao_union.convert_url(num_iid)
  134. if deal_ol.taoke_url:
  135. deal_ol.is_onsale = True
  136. #session.commit()
  137. else:
  138. print '已?~X?~\?', num_iid
  139. add_to_topic(deal_ol, cate_url_name, session)
  140. async_tasks.promotion_detail.apply_async(args=[num_iid], kwargs={}, timeout=30, soft_timeout=10) #?~B步?~J~S?~O~V该?~U~F?~S~A?~Z~D?~J~X?~I??~A?
  141. session.commit()
  142. session.close()
  143. if good['item_id']:
  144. _get_et(good)
  145. def main():
  146. delta = 60 * 60 * 24
  147. main_url = 'http://open3.bantangapp.com/topic/list?app_installtime=1436362830.922099&app_versions=4.1&category=$cate&channel_name=appStore&client_id=bt_app_ios&client_secret=9c1e6634ce1c5098e056628cd66a17a5&oauth_token=13248832ebfb692ab92d7a60d7051da0&os_versions=8.4.1&page=0&pagesize=20&screensize=1242&track_device_info=iPhone&track_deviceid=98D698CD-874B-40AF-919E-B346ECA78A47&track_user_id=1503&v=6'
  148. taobao_union.login()
  149. for url_name, cate_id in CATE_MAP.items():
  150. #if cate['url_name'] != 'fengding' and cate['url_name'] != 'baoyou':
  151. # continue
  152. if url_name == 'all':
  153. continue
  154. print url_name
  155. target_url = main_url.replace('$cate', str(cate_id))
  156. crawl_list(target_url)
  157. tornado.ioloop.IOLoop.instance().add_timeout(timedelta(seconds = delta), main)
  158. if __name__ == '__main__':
  159. main()
  160. tornado.ioloop.IOLoop.instance().start()