/createspace-scraper.py

https://github.com/russx2/createspace-scraper · Python · 88 lines · 57 code · 18 blank · 13 comment · 8 complexity · fe44cd085f715850041631ed54c9b552 MD5 · raw file

  1. import re
  2. import sys
  3. import requests
  4. from BeautifulSoup import BeautifulSoup
  5. def get_sales(email, password, date_start, date_end):
  6. """
  7. Returns the number of item sales and the total revenue between
  8. the two dates passed (inclusive) as a tuple
  9. Dates should be in the format YEAR-MONTH-DAY (e.g. 2011-11-30)
  10. """
  11. session = requests.session()
  12. # Login
  13. r = session.post('https://www.createspace.com/LoginProc.do', data = {
  14. 'redirectURL': '',
  15. 'reason': '',
  16. 'Log In': 'action',
  17. 'login': email,
  18. 'password': password
  19. })
  20. # Initialise report (need a valid report ID)
  21. r = session.get('https://www.createspace.com/pub/reports/init.salesdetails.do?msk=mr')
  22. # Looking for the value attribute:
  23. # <input type="hidden" name="value(member.reports.displaysearchid:4)" value="QA5j9Isd" id="member_reports_displaysearchid:4">
  24. match = re.search('member\.reports\.displaysearchid:5\)" value="(\w*)"', r.content)
  25. if not match:
  26. raise Exception('Could not extract token')
  27. exit()
  28. token = match.group(1)
  29. # Kick-off the report server-side
  30. r = session.post('https://www.createspace.com/pub/reports/ajax/search.salesdetails.do', {
  31. 'value(member.reports.dateoptions)': 'CUSTOM',
  32. 'value(member.reports.startdate)': date_start,
  33. 'value(member.reports.enddate)': date_end,
  34. 'value(member.reports.identifieroptions)': 'OTHER',
  35. 'value(member.reports.identifier)': '',
  36. 'value(member.reports.saleschannelsall)': 'SHOW_ALL',
  37. 'value(member.reports.producttypesall)': 'SHOW_ALL',
  38. 'value(member.reports.paymentstatusfilter)': 'SHOW_ALL',
  39. 'value(member.reports.paymentnumber)': '',
  40. 'value(member.reports.displaysearchid:5)': token
  41. })
  42. # Fetch the generated report details
  43. r = session.post('https://www.createspace.com/pub/reports/ajax/table.salesdetails.do?sid=' + token + '&msk=mr')
  44. markup = BeautifulSoup(r.content)
  45. markupHeadingBlock = markup.find('tr', {'class': 'head2'})
  46. totalQuantity = markupHeadingBlock.find(text = re.compile('\d+'))
  47. totalValue = markupHeadingBlock.find(text = re.compile('\$\d+'))
  48. # Cleanup the data
  49. if totalQuantity is None:
  50. totalQuantity = 0
  51. else:
  52. totalQuantity = int(totalQuantity.strip())
  53. if totalValue is None:
  54. totalValue = float(0)
  55. else:
  56. totalValue = float(totalValue.strip().replace('$', ''))
  57. return (totalQuantity, totalValue)
  58. def main():
  59. if len(sys.argv) != 5:
  60. print 'Missing arguments'
  61. exit()
  62. email = sys.argv[1]
  63. password = sys.argv[2]
  64. date_start = sys.argv[3]
  65. date_end = sys.argv[4]
  66. data = get_sales(email, password, date_start, date_end)
  67. print str(data[0]) + ' ' + str(data[1])
  68. if __name__ == '__main__':
  69. main()