PageRenderTime 81ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/fannie/modules/plugins2.0/UnfiInvoiceGrabber/UIGDownload.php

https://github.com/CORE-POS/IS4C
PHP | 330 lines | 203 code | 27 blank | 100 comment | 18 complexity | c5ffd4a1f713f19cb5f2f3943963a659 MD5 | raw file
  1. <?php
  2. /*******************************************************************************
  3. Copyright 2013 Whole Foods Co-op
  4. This file is part of CORE-POS.
  5. IT CORE is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. IT CORE is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. in the file license.txt along with IT CORE; if not, write to the Free Software
  15. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. *********************************************************************************/
  17. class UIGDownload {} // plugin format compliance
  18. if (php_sapi_name() === 'cli' && basename($_SERVER['PHP_SELF']) == basename(__FILE__)) {
  19. include(dirname(__FILE__).'/../../../config.php');
  20. if (!class_exists('FannieAPI')) {
  21. include(__DIR__ . '/../../../classlib2.0/FannieAPI.php');
  22. }
  23. if (!class_exists('UIGLib')) {
  24. include('UIGLib.php');
  25. }
  26. $dbc = FannieDB::get($FANNIE_OP_DB);
  27. $UNFI_USERNAME = $FANNIE_PLUGIN_SETTINGS['UnfiInvoiceUser'];
  28. $UNFI_PASSWORD = $FANNIE_PLUGIN_SETTINGS['UnfiInvoicePass'];
  29. $LOGIN_URL = 'https://customers.unfi.com/_login/LoginPage/Login.aspx';
  30. $IFRAME_DOMAIN = 'https://stsuser.unfi.com';
  31. $HOME_URL = 'https://customers.unfi.com/_trust/pages/home.aspx';
  32. $SESSION_URL = 'https://stsuser.unfi.com/default.aspx/GetSessionValue';
  33. $INVOICE_URL = 'https://customers.unfi.com/Pages/ReportDetail.aspx?ReportID=41&ReportName=Invoices%20Download';
  34. $REPORT_GEN_URL = 'https://customers.unfi.com/_layouts/15/UNFI.UPO.WP.DynamicReportParams/AjaxBridge.aspx/SaveReportParams';
  35. $cookies = tempnam(sys_get_temp_dir(), 'cj_');
  36. /**
  37. Step 1:
  38. Download the login page
  39. */
  40. $ch = curl_init($LOGIN_URL);
  41. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  42. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  43. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  44. curl_setopt($ch, CURLOPT_AUTOREFERER, true);
  45. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  46. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  47. $login_page = curl_exec($ch);
  48. curl_close($ch);
  49. echo "Login (1/4)\n";
  50. /**
  51. Get hidden fields from login page
  52. */
  53. $inputs_regex = '/<input .*?name="(.+?)" .*?value="(.*?)"/';
  54. preg_match_all($inputs_regex, $login_page, $matches);
  55. $login_post = '';
  56. for($i=0; $i<count($matches[1]); $i++) {
  57. $login_post .= $matches[1][$i] . '=' . urlencode($matches[2][$i]) . '&';
  58. }
  59. /**
  60. add username and password
  61. */
  62. $login_post .= 'userName='.urlencode($UNFI_USERNAME);
  63. $login_post .= '&Password='.urlencode($UNFI_PASSWORD);
  64. /**
  65. POST login info back to login page
  66. */
  67. $ch = curl_init($LOGIN_URL);
  68. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  69. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  70. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  71. curl_setopt($ch, CURLOPT_AUTOREFERER, true);
  72. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  73. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  74. curl_setopt($ch, CURLOPT_POST, true);
  75. curl_setopt($ch, CURLOPT_POSTFIELDS, $login_post);
  76. curl_setopt($ch, CURLOPT_HEADER, true);
  77. $body = curl_exec($ch);
  78. $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  79. $referer = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
  80. curl_close($ch);
  81. echo "Login (2/4)\n";
  82. /**
  83. Find iframes in the resulting page
  84. Need to download the iframe which contains
  85. a big XML token, the POST that token to
  86. the home URL
  87. Note:
  88. The Referer header field is required when downloading
  89. the iframe. If that header isn't set, you won't get a valid
  90. result.
  91. Posting the token to the home URL return an HTTP 403
  92. and a page saying you need to login first. This is not
  93. accurate. Subsequent requests will be logged in.
  94. */
  95. $iframe_regex = '/<iframe .*src="(.*?)"/';
  96. preg_match_all($iframe_regex, $body, $matches);
  97. foreach($matches[1] as $url) {
  98. $full_url = $IFRAME_DOMAIN . '/' . $url;
  99. $ch = curl_init($full_url);
  100. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  101. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  102. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  103. curl_setopt($ch, CURLOPT_AUTOREFERER, true);
  104. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  105. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  106. curl_setopt($ch, CURLOPT_HEADER, true);
  107. curl_setopt($ch, CURLOPT_REFERER, $referer);
  108. $iframe = curl_exec($ch);
  109. curl_close($ch);
  110. echo "Login (3/4)\n";
  111. preg_match_all($inputs_regex, $iframe, $matches);
  112. $post_data = '';
  113. for($i=0;$i<count($matches[1]);$i++) {
  114. // complication; convert undo html encoding in the xml
  115. // e.g., &lt and then reencode for url
  116. // e.g., %3C
  117. $post_data .= $matches[1][$i] . '=' . urlencode(htmlspecialchars_decode(($matches[2][$i])));
  118. if ($i < count($matches[1])-1) {
  119. $post_data .= '&';
  120. }
  121. }
  122. $ch = curl_init($HOME_URL);
  123. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  124. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  125. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  126. curl_setopt($ch, CURLOPT_AUTOREFERER, true);
  127. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  128. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  129. curl_setopt($ch, CURLOPT_HEADER, true);
  130. curl_setopt($ch, CURLOPT_POST, true);
  131. curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
  132. curl_setopt($ch, CURLOPT_REFERER, $full_url);
  133. $body = curl_exec($ch);
  134. $referer = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
  135. curl_close($ch);
  136. echo "Login (4/4)\n";
  137. }
  138. /**
  139. Requesting session value isn't necessary
  140. Using a browser does this but I never get
  141. a valid result when using the script and it
  142. doesn't seem to matter.
  143. $ch = curl_init($SESSION_URL);
  144. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  145. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  146. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  147. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  148. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  149. curl_setopt($ch, CURLOPT_HEADER, true);
  150. curl_setopt($ch, CURLOPT_POST, true);
  151. curl_setopt($ch, CURLOPT_POSTFIELDS, '');
  152. $session_page = curl_exec($ch);
  153. curl_close($ch);
  154. */
  155. /**
  156. Get invoice download page
  157. */
  158. $ch = curl_init($INVOICE_URL);
  159. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  160. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  161. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  162. curl_setopt($ch, CURLOPT_HEADER, true);
  163. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  164. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  165. $invoice_page = curl_exec($ch);
  166. curl_close($ch);
  167. echo "Getting available dates\n";
  168. /**
  169. Extract available dates
  170. They're now embedded in javascript as JSON list
  171. of objects instead of being in a <select> field
  172. */
  173. $dates = array();
  174. $json_regex = '/dataSource: (\[.*?\])/';
  175. preg_match_all($json_regex, $invoice_page, $matches);
  176. foreach($matches[1] as $match) {
  177. $data = json_decode($match);
  178. if (strtotime($data[0]->Text)) {
  179. $dates = $data;
  180. break;
  181. }
  182. }
  183. /**
  184. Extract inputs by id
  185. They contain some useful information for the
  186. actual downloads.
  187. */
  188. $id_regex = '/<input .*?id="(.+?)" .*?value="(.*?)"/';
  189. preg_match_all($id_regex, $invoice_page, $matches);
  190. $inputs = array();
  191. for($i=0; $i<count($matches[1]); $i++) {
  192. $inputs[$matches[1][$i]] = $matches[2][$i];
  193. }
  194. // I think only this one needs to be decoded
  195. $inputs['claims'] = json_decode(htmlspecialchars_decode($inputs['claims']));
  196. $check = $dbc->prepare('SELECT orderID FROM PurchaseOrder WHERE vendorID=? and userID=0
  197. AND creationDate=? AND placedDate=?');
  198. foreach($dates as $date) {
  199. $good_date = date('Y-m-d', strtotime($date->Text));
  200. $doCheck = $dbc->execute($check, array(1, $good_date, $good_date));
  201. $diff = time() - strtotime($date->Text);
  202. $repeat = false;
  203. if ($dbc->num_rows($doCheck) > 0 && $diff > (7 * 24 * 60 * 60)) {
  204. echo "Skipping " . $date->Text . " (already imported)\n";
  205. continue;
  206. } else if ($dbc->num_rows($doCheck) > 0) {
  207. echo "Redownloading " . $date->Text . "\n";
  208. $repeat = true;
  209. }
  210. /**
  211. POST a JSON value to request a particular report
  212. The response will be a simple JSON object containing
  213. the actual file URL.
  214. { "d" : "http://customer.unfi.com/path/to/file.zip" }
  215. */
  216. $cv = 'CustomerNumber->>' . $inputs['hdnCustomerNumber'];
  217. $cv .= '||InvoiceDate->>' . $date->Value;
  218. $cv .= '||SelectedChain->>' . $inputs['hdnCustomerNumber'];
  219. $cv .= '||Delimiter->>csv||Hyphen->>0';
  220. $cv .= '||ReportPath->>' . $inputs['hdnReportPath'];
  221. $json_request = array(
  222. 'ControlsAndValues' => $cv,
  223. 'ReportOptions' => 'zip',
  224. 'userID' => $inputs['claims']->UserId,
  225. 'reportID' => 41,
  226. 'customerNumber' => $inputs['hdnCustomerNumber'],
  227. 'emailAddress' => $inputs['claims']->EmailAddress,
  228. 'chainAccounts' => '',
  229. 'actionType' => 'Save',
  230. );
  231. // not sure if this is actually needed
  232. // browser ends up with this cookie
  233. $fp = fopen($cookies, 'a');
  234. fwrite($fp, "customers.unfi.com\tFALSE\t/\tFALSE\t0]\tWSS_FullScreenMode\tfalse\n");
  235. fclose($fp);
  236. $ch = curl_init($REPORT_GEN_URL);
  237. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  238. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  239. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  240. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  241. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  242. curl_setopt($ch, CURLOPT_REFERER, $INVOICE_URL);
  243. curl_setopt($ch, CURLOPT_POST, true);
  244. $json = json_encode($json_request);
  245. $json = str_replace("\\", '', $json);
  246. $json = str_replace('"reportID":41', '"reportID":"41"', $json);
  247. curl_setopt($ch, CURLOPT_POSTFIELDS, $json);
  248. // authorization is definitely needed; the rest may
  249. // or may not be. Debugging took awhile
  250. curl_setopt($ch, CURLOPT_HTTPHEADER,
  251. array(
  252. "Content-Type: application/json; charset=utf-8",
  253. 'Authorization: ' . $inputs['hfTokValidator'],
  254. 'X-Requested-With: XMLHttpRequest',
  255. 'Accept: application/json, text/javascript, */*; q=0.01',
  256. 'Accept-Language: en-US,en;q=0.5',
  257. 'User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:28.0) Gecko/20100101 Firefox/28.0',
  258. 'Pragma: no-cache',
  259. 'Cache-Control: no-cache',
  260. )
  261. );
  262. $gen_report = curl_exec($ch);
  263. curl_close($ch);
  264. $response = json_decode($gen_report);
  265. if ($response) {
  266. echo "Downloading " . $date->Text . "...\n";
  267. $filename = str_replace('/','-',$date->Text).'.zip';
  268. $fp = fopen($filename, 'w');
  269. $ch = curl_init($response->d);
  270. curl_setopt($ch, CURLOPT_FILE, $fp);
  271. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  272. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  273. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
  274. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
  275. $invoice_file = curl_exec($ch);
  276. curl_close($ch);
  277. fclose($fp);
  278. echo "Importing invoices for " . $date->Text . "\n";
  279. if (UIGLib::import($filename, $repeat) === true) {
  280. unlink($filename);
  281. } else {
  282. echo "ERROR: IMPORT FAILED!\n";
  283. }
  284. }
  285. // only download one day for now
  286. // remove when done testing
  287. //break;
  288. // politeness; pause between requests
  289. sleep(15);
  290. }
  291. /**
  292. Cleanup: delete cookie file
  293. */
  294. unlink($cookies);
  295. }