PageRenderTime 72ms CodeModel.GetById 45ms RepoModel.GetById 0ms app.codeStats 0ms

/yes24_script.py

https://github.com/jangxyz/yes24
Python | 193 lines | 187 code | 4 blank | 2 comment | 2 complexity | b5e6cd7a620c64113328ba9c7cc2287f MD5 | raw file
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import getpass
  4. import urllib, urllib2, cookielib
  5. from BeautifulSoup import BeautifulSoup
  6. from datetime import datetime
  7. import sys, re
  8. import logging
  9. if '-d' in sys.argv:
  10. logging.basicConfig(level=logging.DEBUG)
  11. target_month=datetime.now().strftime("%Y.%m")
  12. default_url = "http://www.yes24.com/"
  13. secure_url = "https://www.yes24.com/"
  14. login_url = "https://www.yes24.com/Templates/FTLogIn.aspx"
  15. order_path = "/Member/FTMyOrderList01.aspx"
  16. order_url = secure_url + order_path
  17. order_detail_url = "https://www.yes24.com/Member/FTMyOrderDtl01.aspx"
  18. login_data = {
  19. "SMemberID" : None,
  20. "SMemberPassword" : None,
  21. "RefererUrl" : "http://www.yes24.com/Main/Default.aspx",
  22. "AutoLogin" : "1",
  23. "LoginIDSave" : "N",
  24. "FBLoginSub:LoginType" : '',
  25. "FBLoginSub:ReturnURL" : '',
  26. "FBLoginSub:ReturnParams": '',
  27. }
  28. def authorize(username, password):
  29. cj = cookielib.CookieJar()
  30. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
  31. login_data['SMemberID'] = username
  32. login_data['SMemberPassword'] = password
  33. resp = opener.open(login_url, urllib.urlencode(login_data))
  34. html = resp.read()
  35. if 'location.replace' not in html:
  36. return None
  37. else:
  38. return opener
  39. def open_url(url):
  40. logging.debug("openening url: " + url)
  41. site = opener.open(url)
  42. text = site.read()
  43. return text.decode('cp949')
  44. def test_login(html):
  45. if 'Login' in html: # 로그아웃 있으니 login하라고 뜬다
  46. return False
  47. elif 'Logout' in html: # 로그인 있으니 logout할 있다고 뜬다
  48. return True
  49. else:
  50. raise Exception("로그인 여부를 판단할 수 없습니다.")
  51. def massage_html(text):
  52. return text.replace('''<a style="cursor:hand";''', '''<a style="cursor:hand"''')
  53. def parse_order_page(text):
  54. start_pattern = '''<div id="ordList"'''
  55. end_pattern = '''<script language='JavaScript'>'''
  56. start_idx = text.find(start_pattern)
  57. end_idx = text[start_idx:].find(end_pattern) + start_idx
  58. if end_idx == start_idx -1:
  59. raise Exception('cannot find end pattern from %s: %s' % (order_url, end_pattern))
  60. text = text[start_idx:end_idx]
  61. # massage
  62. text = text.replace('''<a style="cursor:hand";''', '''<a style="cursor:hand"''')
  63. # parse
  64. soup = BeautifulSoup(text)
  65. order_list_table = soup.table(id="MyOrderListTbl")[0]
  66. page_navigator_table = soup.table(id="tblNavigator")[0]
  67. # navigation
  68. current_page_anchor = page_navigator_table.find('a', href=None)
  69. next_page_anchor = current_page_anchor.findNextSibling('a')
  70. print 'current page:', current_page_anchor.string
  71. print 'next page:', next_page_anchor["href"]
  72. navi_info = (current_page_anchor.string, next_page_anchor["href"])
  73. # order list
  74. orders = []
  75. remove_bogus_rows = lambda tag: tag.name == u'tr' and len(tag.findAll('td')) != 1
  76. remove_bogus_cell = lambda tag: tag.name == u'td' and tag['width'] != u'1'
  77. for tr in order_list_table.find('tr').findNextSiblings(remove_bogus_rows):
  78. tds = tr.findAll(remove_bogus_cell)
  79. order_id = tds[0].b.string
  80. order_detail_link = get_order_detail_link(order_id)
  81. order_date = tds[1].string
  82. order_name = tds[2].span.string
  83. order_price = tds[3].b.string
  84. pkg_num = tds[3].b.string.next.rsplit('/')[-1]
  85. deliver_state_link = get_deliver_state_link(order_id)
  86. if not str(order_date).startswith(target_month):
  87. continue
  88. #print '-', order_date, order_id, order_name, order_price, pkg_num, deliver_state_link
  89. #print '[%s] %s 에 %s원치(%s개)를 샀습니다: %s' % (order_id, order_date, order_price, pkg_num, order_name)
  90. orders.append( (order_id, order_date, order_price, pkg_num, order_name) )
  91. return (orders, navi_info)
  92. def get_order_detail_link(order_id):
  93. return order_detail_url + "?ordNoH=" + order_id
  94. def get_deliver_state_link(order_id):
  95. return "http://www.yes24.com/Order/FTDelvTrcListFrame.aspx?OID="+order_id+"&TTL=L"
  96. def parse_order_detail_page(text):
  97. start_pattern = '''<span id="infoQuickDlv"'''
  98. end_pattern = '''<script Language=javascript>'''
  99. start_idx = text.find(start_pattern)
  100. end_idx = text[start_idx:].find(end_pattern) + start_idx
  101. if end_idx == start_idx -1:
  102. raise Exception('cannot find end pattern from %s: %s' % (order_url, end_pattern))
  103. text = text[start_idx:end_idx]
  104. # massage
  105. text = text.replace('''<table cellpadding="0" cellspacing=0" border="0" >''', '''<table cellpadding="0" cellspacing="0" border="0" >''')
  106. # parse
  107. soup = BeautifulSoup(text)
  108. order_price = soup.find(id="CLbTotOrdAmt").b.string
  109. text = ''.join(filter(lambda x: '<span id="CLbPayPrInfo">' in x, text.split("\r\n"))).strip()
  110. text = '<table>' + text[text[1:].find('<')+1:-7] + '</table>'
  111. soup = BeautifulSoup(text)
  112. point_saved = soup.find(attrs={'class':"price"}).b.string
  113. if soup.find(attrs={'class':"priceB"}) is not None:
  114. money_spent = soup.find(attrs={'class':"priceB"}).string
  115. else:
  116. money_spent = u'0'
  117. if soup.find(text=re.compile(u'결제.*수단')) is not None:
  118. payment_method = soup.find(text=re.compile(u'결제.*수단')).parent.findNextSibling('td').next.replace("&nbsp;", '').strip()
  119. else:
  120. payment_method = None
  121. find_discount = lambda tag: tag.name == u'td' and \
  122. tag.findNextSibling('td') and \
  123. tag.findNextSibling('td').findNextSibling('td') and \
  124. tag.findNextSibling('td').findNextSibling('td').b and \
  125. tag.findNextSibling('td').findNextSibling('td').b.string != u'0'
  126. discounts = soup.table.table.table.findAll(find_discount)
  127. discounts = map(lambda td: (td.contents[-1], td.findNextSibling('td').findNextSibling('td').b.string), discounts)
  128. # output
  129. print ' *',
  130. if len(discounts) > 0:
  131. if money_spent != u'0':
  132. print u"%s원(%s %s원" % (order_price, payment_method, money_spent),
  133. else:
  134. print u"%s원(%s원" % (order_price, money_spent),
  135. for discount_by, discount_amt in discounts:
  136. print u"+ %s %s원" % (discount_by.strip(), discount_amt),
  137. print u")/ %s점 적립" % (point_saved)
  138. else:
  139. print u"%s원 / %s점 적립" % (order_price, point_saved)
  140. return order_price, point_saved, payment_method, money_spent, discounts
  141. # login
  142. username = raw_input('Username: ')
  143. password = getpass.getpass()
  144. opener = authorize(username, password)
  145. del username, password
  146. # orders
  147. orders = []
  148. path = order_path
  149. while True:
  150. text = open_url(secure_url + path)
  151. partial_orders, (page_no, path) = parse_order_page(text)
  152. if len(partial_orders) == 0 or path is None:
  153. break
  154. logging.debug('%d orders for page %s' % (len(partial_orders), page_no))
  155. orders.extend(partial_orders)
  156. logging.info(len(orders), 'orders')
  157. earliest_date = min(order[1] for order in orders)
  158. latest_date = max(order[1] for order in orders)
  159. prices_sum = sum(int(order[2].replace(",", '')) for order in orders)
  160. pkg_count = sum(int(order[3]) for order in orders)
  161. import locale; locale.setlocale(locale.LC_ALL, '')
  162. print u"%s ~ %s 동안 %d번 주문: 총 %d개, %s원" % (earliest_date, latest_date, len(orders), pkg_count, locale.format("%d", prices_sum, True))
  163. # order details
  164. for i,order in enumerate(orders):
  165. text = open_url(get_order_detail_link(order[0]))
  166. parse_order_detail_page(text)