PageRenderTime 168ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/DynamicPricingAlgo .ipynb

https://gitlab.com/thiagarajan.saravanan/dynamic-pricer
Jupyter | 496 lines | 496 code | 0 blank | 0 comment | 0 complexity | 009e42546622f3fcc6d6fcbc22ba8029 MD5 | raw file
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 6,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [
  10. {
  11. "data": {
  12. "text/plain": [
  13. "<pandas.io.excel.ExcelFile at 0x114f008d0>"
  14. ]
  15. },
  16. "execution_count": 6,
  17. "metadata": {},
  18. "output_type": "execute_result"
  19. }
  20. ],
  21. "source": [
  22. "import matplotlib.pyplot as plt\n",
  23. "import numpy as np\n",
  24. "import pandas as pd\n",
  25. "data = pd.ExcelFile(\"Breakfast.xlsx\")\n",
  26. "data"
  27. ]
  28. },
  29. {
  30. "cell_type": "code",
  31. "execution_count": 8,
  32. "metadata": {
  33. "collapsed": false
  34. },
  35. "outputs": [],
  36. "source": [
  37. "df = data.parse('dh Transaction Data')\n"
  38. ]
  39. },
  40. {
  41. "cell_type": "code",
  42. "execution_count": 30,
  43. "metadata": {
  44. "collapsed": false
  45. },
  46. "outputs": [],
  47. "source": [
  48. "df1 = df.ix[1:]"
  49. ]
  50. },
  51. {
  52. "cell_type": "code",
  53. "execution_count": 1,
  54. "metadata": {
  55. "collapsed": false
  56. },
  57. "outputs": [
  58. {
  59. "data": {
  60. "text/plain": [
  61. "4"
  62. ]
  63. },
  64. "execution_count": 1,
  65. "metadata": {},
  66. "output_type": "execute_result"
  67. }
  68. ],
  69. "source": [
  70. "'''##################Testing Similarity between Matlab & R#################\n",
  71. "\n",
  72. "################## Indexing similarities & Differences #################\n",
  73. "length(value)\n",
  74. "i=4\n",
  75. "Bolts[1:i,1:i]\n",
  76. "\n",
  77. "Bolts[':',1:i]#To be typed within quotes is the difference\n",
  78. "\n",
  79. "Bolts[c(1,3),c(1,3)]#Need to check for this functionality in matlab\n",
  80. "'''\n",
  81. "4"
  82. ]
  83. },
  84. {
  85. "cell_type": "code",
  86. "execution_count": 28,
  87. "metadata": {
  88. "collapsed": false
  89. },
  90. "outputs": [
  91. {
  92. "data": {
  93. "text/html": [
  94. "<div>\n",
  95. "<table border=\"1\" class=\"dataframe\">\n",
  96. " <thead>\n",
  97. " <tr style=\"text-align: right;\">\n",
  98. " <th></th>\n",
  99. " <th>Unnamed: 1</th>\n",
  100. " <th>Unnamed: 2</th>\n",
  101. " <th>Unnamed: 3</th>\n",
  102. " </tr>\n",
  103. " </thead>\n",
  104. " <tbody>\n",
  105. " <tr>\n",
  106. " <th>1</th>\n",
  107. " <td>367</td>\n",
  108. " <td>1111009477</td>\n",
  109. " <td>13</td>\n",
  110. " </tr>\n",
  111. " <tr>\n",
  112. " <th>2</th>\n",
  113. " <td>367</td>\n",
  114. " <td>1111009497</td>\n",
  115. " <td>20</td>\n",
  116. " </tr>\n",
  117. " <tr>\n",
  118. " <th>3</th>\n",
  119. " <td>367</td>\n",
  120. " <td>1111009507</td>\n",
  121. " <td>14</td>\n",
  122. " </tr>\n",
  123. " <tr>\n",
  124. " <th>4</th>\n",
  125. " <td>367</td>\n",
  126. " <td>1111035398</td>\n",
  127. " <td>4</td>\n",
  128. " </tr>\n",
  129. " <tr>\n",
  130. " <th>5</th>\n",
  131. " <td>367</td>\n",
  132. " <td>1111038078</td>\n",
  133. " <td>3</td>\n",
  134. " </tr>\n",
  135. " </tbody>\n",
  136. "</table>\n",
  137. "</div>"
  138. ],
  139. "text/plain": [
  140. " Unnamed: 1 Unnamed: 2 Unnamed: 3\n",
  141. "1 367 1111009477 13\n",
  142. "2 367 1111009497 20\n",
  143. "3 367 1111009507 14\n",
  144. "4 367 1111035398 4\n",
  145. "5 367 1111038078 3"
  146. ]
  147. },
  148. "execution_count": 28,
  149. "metadata": {},
  150. "output_type": "execute_result"
  151. }
  152. ],
  153. "source": [
  154. "#df1.loc['d':,'A':'C']\n",
  155. "#df.loc[1:5,'Unnamed: 1':'Unnamed: 3']\n",
  156. "#Removing the 1st row\n",
  157. "3\n"
  158. ]
  159. },
  160. {
  161. "cell_type": "code",
  162. "execution_count": 8,
  163. "metadata": {
  164. "collapsed": false
  165. },
  166. "outputs": [],
  167. "source": [
  168. "\n",
  169. "#Connecting to Mongo\n",
  170. "'''\n",
  171. "Method 1\n",
  172. "import pandas as pd\n",
  173. "from pymongo import MongoClient\n",
  174. "\n",
  175. "\n",
  176. "def _connect_mongo(host, port, username, password, db):\n",
  177. " \"\"\" A util for making a connection to mongo \"\"\"\n",
  178. "\n",
  179. " if username and password:\n",
  180. " mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)\n",
  181. " conn = MongoClient(mongo_uri)\n",
  182. " else:\n",
  183. " conn = MongoClient(host, port)\n",
  184. "\n",
  185. "\n",
  186. " return conn[db]\n",
  187. "\n",
  188. "\n",
  189. "def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True):\n",
  190. " \"\"\" Read from Mongo and Store into DataFrame \"\"\"\n",
  191. "\n",
  192. " # Connect to MongoDB\n",
  193. " db = _connect_mongo(host=host, port=port, username=username, password=password, db=db)\n",
  194. "\n",
  195. " # Make a query to the specific DB and Collection\n",
  196. " cursor = db[collection].find(query)\n",
  197. "\n",
  198. " # Expand the cursor and construct the DataFrame\n",
  199. " df = pd.DataFrame(list(cursor))\n",
  200. "\n",
  201. " # Delete the _id\n",
  202. " if no_id:\n",
  203. " del df['_id']\n",
  204. "\n",
  205. " return df\n",
  206. "'''\n",
  207. "'''\n",
  208. "#Method 2\n",
  209. "\n",
  210. "import pymongo\n",
  211. "import pandas as pd\n",
  212. "from pymongo import Connection\n",
  213. "connection = Connection()\n",
  214. "db = connection.immibytes\n",
  215. "input_data = db.myCollection\n",
  216. "data = pd.DataFrame(list(input_data.find()))\n",
  217. "'''\n",
  218. "#Method 3\n",
  219. "import matplotlib.pyplot as plt\n",
  220. "import numpy as np\n",
  221. "import pandas as pd\n",
  222. "\n",
  223. "\n",
  224. "from pymongo import MongoClient\n",
  225. "uri = \"mongodb://poc_user:pocatimmibytes@ec2-52-37-213-141.us-west-2.compute.amazonaws.com/immibytes\";\n",
  226. "client = MongoClient(uri)\n",
  227. "db = client.immibytes\n",
  228. "input_data = db.myCollection\n",
  229. "#cursor=db['myCollection'].find({})\n",
  230. "#list(db.myCollection.find({}))\n"
  231. ]
  232. },
  233. {
  234. "cell_type": "code",
  235. "execution_count": 1,
  236. "metadata": {
  237. "collapsed": false
  238. },
  239. "outputs": [],
  240. "source": [
  241. "#df = pd.DataFrame(list(db.myCollection.find({})))\n",
  242. "import pymongo\n",
  243. "import pandas as pd\n",
  244. "#from pymongo import Connection\n",
  245. "from pymongo import MongoClient\n",
  246. "c = MongoClient()\n",
  247. "#from mogo import connect as PyConnection\n",
  248. "#connection = Connection()\n",
  249. "db = c.immibytes\n",
  250. "\n",
  251. "#input_data = db.transaction_50k.find({\"PROD_CODE_40\":\"D00001\"})\n",
  252. "\n",
  253. "#input_data = db.transaction_50k"
  254. ]
  255. },
  256. {
  257. "cell_type": "code",
  258. "execution_count": 99,
  259. "metadata": {
  260. "collapsed": false
  261. },
  262. "outputs": [],
  263. "source": [
  264. "#D1 = db['D1'].find({})\n",
  265. " \n",
  266. "#D4 = db['D4'].find({})\n",
  267. "#D5 = db['D5'].find({})\n",
  268. "#D6 = db['D6'].find({})\n",
  269. "#D7 = db['D7'].find({})\n",
  270. "#D8 = db['D8'].find({})\n",
  271. "D9 = db['D9'].find({})"
  272. ]
  273. },
  274. {
  275. "cell_type": "code",
  276. "execution_count": 100,
  277. "metadata": {
  278. "collapsed": false
  279. },
  280. "outputs": [],
  281. "source": [
  282. "#df3 = pd.DataFrame(list(input_data3))\n",
  283. "\n",
  284. "#PD1=pd.DataFrame(list(D1))\n",
  285. "\n",
  286. "#PD4=pd.DataFrame(list(D4))\n",
  287. "#PD5=pd.DataFrame(list(D5))\n",
  288. "#PD6=pd.DataFrame(list(D6))\n",
  289. "#PD7=pd.DataFrame(list(D7))\n",
  290. "#PD8=pd.DataFrame(list(D8))\n",
  291. "#PD9=pd.DataFrame(list(D9))"
  292. ]
  293. },
  294. {
  295. "cell_type": "code",
  296. "execution_count": 122,
  297. "metadata": {
  298. "collapsed": false
  299. },
  300. "outputs": [
  301. {
  302. "data": {
  303. "text/plain": [
  304. "5"
  305. ]
  306. },
  307. "execution_count": 122,
  308. "metadata": {},
  309. "output_type": "execute_result"
  310. }
  311. ],
  312. "source": [
  313. "#df2\n",
  314. "#PD9\n",
  315. "5"
  316. ]
  317. },
  318. {
  319. "cell_type": "code",
  320. "execution_count": 154,
  321. "metadata": {
  322. "collapsed": false
  323. },
  324. "outputs": [],
  325. "source": [
  326. "#PD1.to_csv('Data1.csv')\n",
  327. "#PD2.to_csv('Data2.csv')\n",
  328. "#D2G4.to_csv('Data2G4.csv')\n",
  329. "#D2G5.to_csv('Data2G5.csv')\n",
  330. "#D2G6.to_csv('Data2G6.csv')\n",
  331. "#D2G7.to_csv('Data2G7.csv')-Not yet done\n",
  332. "#D2G8.to_csv('Data2G8.csv')\n",
  333. "#D2G9.to_csv('Data2G9.csv')\n",
  334. "\n",
  335. "#D3G10.to_csv('Data3G10.csv')\n",
  336. "#D3G11.to_csv('Data3G11.csv')\n",
  337. "#D3G12.to_csv('Data3G12.csv')\n",
  338. "#D3G13.to_csv('Data3G13.csv')\n",
  339. "#D3G14.to_csv('Data3G14.csv')\n",
  340. "#D3G15.to_csv('Data3G15.csv')\n",
  341. "D3G16.to_csv('Data3G16.csv')\n",
  342. "\n",
  343. "\n",
  344. "#PD4.to_csv('Data4.csv')\n",
  345. "#PD5.to_csv('Data5.csv')\n",
  346. "#PD6.to_csv('Data6.csv')\n",
  347. "#PD7.to_csv('Data7.csv')\n",
  348. "#PD8.to_csv('Data8.csv')\n",
  349. "#PD9.to_csv('Data9.csv')\n"
  350. ]
  351. },
  352. {
  353. "cell_type": "code",
  354. "execution_count": 153,
  355. "metadata": {
  356. "collapsed": false
  357. },
  358. "outputs": [],
  359. "source": [
  360. "#list(db.transaction_5k.find({}))\n",
  361. "#DDist1=pd.DataFrame(list(db.D3.distinct( \"PROD_CODE_30\" )))\n",
  362. "#D2G4 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00004\"})))\n",
  363. "#D2G5 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00005\"})))\n",
  364. "#D2G6 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00006\"})))\n",
  365. "#D2G7 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00007\"})))\n",
  366. "#D2G8 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00008\"})))\n",
  367. "#D2G9 = pd.DataFrame(list(db.D2.find({\"PROD_CODE_30\":\"G00009\"})))\n",
  368. "\n",
  369. "#D3G10 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00010\"})))\n",
  370. "#D3G11 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00011\"})))\n",
  371. "#D3G12 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00012\"})))\n",
  372. "#D3G13 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00013\"})))\n",
  373. "#D3G14 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00014\"})))\n",
  374. "#D3G15 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00015\"})))\n",
  375. "#D3G16 = pd.DataFrame(list(db.D3.find({\"PROD_CODE_30\":\"G00016\"})))\n"
  376. ]
  377. },
  378. {
  379. "cell_type": "code",
  380. "execution_count": 37,
  381. "metadata": {
  382. "collapsed": false
  383. },
  384. "outputs": [],
  385. "source": [
  386. "df.to_csv('Data2.csv')"
  387. ]
  388. },
  389. {
  390. "cell_type": "code",
  391. "execution_count": null,
  392. "metadata": {
  393. "collapsed": true
  394. },
  395. "outputs": [],
  396. "source": []
  397. },
  398. {
  399. "cell_type": "code",
  400. "execution_count": 105,
  401. "metadata": {
  402. "collapsed": false
  403. },
  404. "outputs": [
  405. {
  406. "data": {
  407. "text/plain": [
  408. "3"
  409. ]
  410. },
  411. "execution_count": 105,
  412. "metadata": {},
  413. "output_type": "execute_result"
  414. }
  415. ],
  416. "source": [
  417. "#df1.loc['d':,'A':'C']\n",
  418. "#df.loc[1:5,'Unnamed: 1':'Unnamed: 3']\n",
  419. "#Removing the 1st row\n",
  420. "\n",
  421. "df.loc[1:5,'name':'x']\n"
  422. ]
  423. },
  424. {
  425. "cell_type": "code",
  426. "execution_count": 106,
  427. "metadata": {
  428. "collapsed": false
  429. },
  430. "outputs": [
  431. {
  432. "data": {
  433. "text/plain": [
  434. "3"
  435. ]
  436. },
  437. "execution_count": 106,
  438. "metadata": {},
  439. "output_type": "execute_result"
  440. }
  441. ],
  442. "source": [
  443. "#Changing the column header\n",
  444. "new_header = df.iloc[0] #grab the first row for the header\n",
  445. "df = df[1:] #take the data less the header row\n",
  446. "df.rename(columns = new_header) #set the header row as the df header\n",
  447. "# Get all the relevant fields\n",
  448. "\n",
  449. "\n",
  450. "\n",
  451. "#Calculate the Elasticity variable\n",
  452. "\n",
  453. "\n",
  454. "\n",
  455. "\n",
  456. "\n",
  457. "#Modeling \n",
  458. "\n",
  459. "\n",
  460. "\n",
  461. "#Transformations\n",
  462. "\n"
  463. ]
  464. },
  465. {
  466. "cell_type": "code",
  467. "execution_count": null,
  468. "metadata": {
  469. "collapsed": true
  470. },
  471. "outputs": [],
  472. "source": []
  473. }
  474. ],
  475. "metadata": {
  476. "kernelspec": {
  477. "display_name": "Python 3",
  478. "language": "python",
  479. "name": "python3"
  480. },
  481. "language_info": {
  482. "codemirror_mode": {
  483. "name": "ipython",
  484. "version": 3
  485. },
  486. "file_extension": ".py",
  487. "mimetype": "text/x-python",
  488. "name": "python",
  489. "nbconvert_exporter": "python",
  490. "pygments_lexer": "ipython3",
  491. "version": "3.5.1"
  492. }
  493. },
  494. "nbformat": 4,
  495. "nbformat_minor": 0
  496. }