PageRenderTime 26ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/ipython_notebooks/2015_10/2015_10_26_Distance_effects.ipynb

https://gitlab.com/jdebelius/Absloute-Power
Jupyter | 527 lines | 527 code | 0 blank | 0 comment | 0 complexity | 8d2bf8f09068b6dd636a3db035866a6b MD5 | raw file
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "**Author**: J W Debelius<br>\n",
  8. "**Date**: 26 October 2015<br>\n",
  9. "**Virtualenv**: power\\_play"
  10. ]
  11. },
  12. {
  13. "cell_type": "code",
  14. "execution_count": 1,
  15. "metadata": {
  16. "collapsed": true
  17. },
  18. "outputs": [],
  19. "source": [
  20. "import numpy as np\n",
  21. "import matplotlib.pyplot as plt\n",
  22. "import scipy\n",
  23. "import pandas as pd\n",
  24. "import skbio\n",
  25. "\n",
  26. "import absloute_power.distance as dist"
  27. ]
  28. },
  29. {
  30. "cell_type": "markdown",
  31. "metadata": {},
  32. "source": [
  33. "I'm going to simulate distances between locations which should be significantly different when the two categories are compared, apply a PERMANOVA and ANOSIM (cites needed), and then determine how to better evaluate effect size on microbiome data. "
  34. ]
  35. },
  36. {
  37. "cell_type": "code",
  38. "execution_count": null,
  39. "metadata": {
  40. "collapsed": true
  41. },
  42. "outputs": [],
  43. "source": [
  44. "def simulate_distance(num_samps, within1=[0.01, 0.25], within2, between, var):\n",
  45. " # Simulates the mapping file\n",
  46. " grouping = np.random.binomial(1, 0.5, (num_samps))\n",
  47. " \n",
  48. " # Determines the number of observations in each group\n",
  49. " num1 = grouping.sum()\n",
  50. " num0 = num_samps - num1\n",
  51. " \n",
  52. " # Simulates the "
  53. ]
  54. },
  55. {
  56. "cell_type": "code",
  57. "execution_count": 222,
  58. "metadata": {
  59. "collapsed": true
  60. },
  61. "outputs": [],
  62. "source": [
  63. "def simulate_poisson_distance(length, scale, param):\n",
  64. " return np.random.poisson(param, length) * scale"
  65. ]
  66. },
  67. {
  68. "cell_type": "code",
  69. "execution_count": 221,
  70. "metadata": {
  71. "collapsed": false
  72. },
  73. "outputs": [
  74. {
  75. "data": {
  76. "text/plain": [
  77. "array([3, 4, 3, 2, 4])"
  78. ]
  79. },
  80. "execution_count": 221,
  81. "metadata": {},
  82. "output_type": "execute_result"
  83. }
  84. ],
  85. "source": [
  86. "np.random.poisson(3, 5)"
  87. ]
  88. },
  89. {
  90. "cell_type": "code",
  91. "execution_count": 16,
  92. "metadata": {
  93. "collapsed": false
  94. },
  95. "outputs": [],
  96. "source": [
  97. "locations = np.random.binomial(1, 0.5, (30))\n",
  98. "locations.sort()"
  99. ]
  100. },
  101. {
  102. "cell_type": "code",
  103. "execution_count": 19,
  104. "metadata": {
  105. "collapsed": false
  106. },
  107. "outputs": [],
  108. "source": [
  109. "within0 = 0.15\n",
  110. "within1 = 0.2\n",
  111. "between = 0.5"
  112. ]
  113. },
  114. {
  115. "cell_type": "code",
  116. "execution_count": 20,
  117. "metadata": {
  118. "collapsed": false
  119. },
  120. "outputs": [
  121. {
  122. "data": {
  123. "text/plain": [
  124. "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,\n",
  125. " 1, 1, 1, 1, 1, 1, 1])"
  126. ]
  127. },
  128. "execution_count": 20,
  129. "metadata": {},
  130. "output_type": "execute_result"
  131. }
  132. ],
  133. "source": [
  134. "locations"
  135. ]
  136. },
  137. {
  138. "cell_type": "code",
  139. "execution_count": 25,
  140. "metadata": {
  141. "collapsed": true
  142. },
  143. "outputs": [],
  144. "source": [
  145. "# num0 = np.sum(locations == 0)\n",
  146. "# num1 = np.sum(locations == 1)"
  147. ]
  148. },
  149. {
  150. "cell_type": "code",
  151. "execution_count": 55,
  152. "metadata": {
  153. "collapsed": false
  154. },
  155. "outputs": [],
  156. "source": [
  157. "in0 = np.random.uniform(0, 1, np.square(num0 - 1)/2)*within0"
  158. ]
  159. },
  160. {
  161. "cell_type": "code",
  162. "execution_count": 142,
  163. "metadata": {
  164. "collapsed": true
  165. },
  166. "outputs": [],
  167. "source": [
  168. "num0 = 4"
  169. ]
  170. },
  171. {
  172. "cell_type": "code",
  173. "execution_count": 174,
  174. "metadata": {
  175. "collapsed": false
  176. },
  177. "outputs": [
  178. {
  179. "name": "stdout",
  180. "output_type": "stream",
  181. "text": [
  182. "vec [1]\n",
  183. "pos1 [1]\n",
  184. "pos2 [0]\n",
  185. "[[ 0. 1.]\n",
  186. " [ 1. 0.]]\n"
  187. ]
  188. }
  189. ],
  190. "source": [
  191. "num0 = 2\n",
  192. "vec = np.arange((np.square(num0)-1)/2) + 1\n",
  193. "dm0 = np.zeros((num0, num0))\n",
  194. "print 'vec', vec\n",
  195. "\n",
  196. "pos_count = 0\n",
  197. "dm0 = np.zeros((num0, num0))\n",
  198. "for i in xrange(num0-1):\n",
  199. " pos1 = np.arange(i+1, num0)\n",
  200. " print 'pos1', pos1\n",
  201. " pos2 = np.arange(i, num0-1) + pos_count\n",
  202. " print 'pos2', pos2\n",
  203. " pos_count = pos_count + len(pos2) - 1\n",
  204. " dm0[i, pos1] = vec[pos2]\n",
  205. " dm0[pos1, i] = vec[pos2]\n",
  206. "print dm0"
  207. ]
  208. },
  209. {
  210. "cell_type": "code",
  211. "execution_count": 179,
  212. "metadata": {
  213. "collapsed": false
  214. },
  215. "outputs": [
  216. {
  217. "name": "stdout",
  218. "output_type": "stream",
  219. "text": [
  220. "vec [1 2 3 4]\n",
  221. "pos1 [1 2]\n",
  222. "pos2 [0 1]\n",
  223. "pos1 [2]\n",
  224. "pos2 [2]\n",
  225. "[[ 0. 1. 2.]\n",
  226. " [ 1. 0. 3.]\n",
  227. " [ 2. 3. 0.]]\n"
  228. ]
  229. }
  230. ],
  231. "source": [
  232. "num0 = 3\n",
  233. "vec = np.arange((np.square(num0)-1)/2) + 1\n",
  234. "dm0 = np.zeros((num0, num0))\n",
  235. "print 'vec', vec\n",
  236. "\n",
  237. "pos_count = 0\n",
  238. "dm0 = np.zeros((num0, num0))\n",
  239. "for i in xrange(num0-1):\n",
  240. " pos1 = np.arange(i+1, num0)\n",
  241. " print 'pos1', pos1\n",
  242. " pos2 = np.arange(i, num0-1) + pos_count\n",
  243. " print 'pos2', pos2\n",
  244. " pos_count = pos_count + len(pos2) - 1\n",
  245. " dm0[i, pos1] = vec[pos2]\n",
  246. " dm0[pos1, i] = vec[pos2]\n",
  247. "print dm0"
  248. ]
  249. },
  250. {
  251. "cell_type": "code",
  252. "execution_count": 181,
  253. "metadata": {
  254. "collapsed": false
  255. },
  256. "outputs": [
  257. {
  258. "name": "stdout",
  259. "output_type": "stream",
  260. "text": [
  261. "vec [1 2 3 4 5 6 7]\n",
  262. "[[ 0. 1. 2. 3.]\n",
  263. " [ 1. 0. 4. 5.]\n",
  264. " [ 2. 4. 0. 6.]\n",
  265. " [ 3. 5. 6. 0.]]\n"
  266. ]
  267. }
  268. ],
  269. "source": [
  270. "num0 = 4\n",
  271. "vec = np.arange((np.square(num0)-1)/2) + 1\n",
  272. "dm0 = np.zeros((num0, num0))\n",
  273. "print 'vec', vec\n",
  274. "\n",
  275. "pos_count = 0\n",
  276. "dm0 = np.zeros((num0, num0))\n",
  277. "for i in xrange(num0-1):\n",
  278. " pos1 = np.arange(i+1, num0)\n",
  279. " pos2 = np.arange(i, num0-1) + pos_count\n",
  280. " pos_count = pos_count + len(pos2) - 1\n",
  281. " dm0[i, pos1] = vec[pos2]\n",
  282. " dm0[pos1, i] = vec[pos2]\n",
  283. "print dm0"
  284. ]
  285. },
  286. {
  287. "cell_type": "code",
  288. "execution_count": 239,
  289. "metadata": {
  290. "collapsed": false
  291. },
  292. "outputs": [],
  293. "source": [
  294. "def convert_to_dm(length, vec):\n",
  295. " \"\"\"Converts a condensed distance matrix to a table format\"\"\"\n",
  296. " # Stacks the data\n",
  297. " vec = np.hstack(vec)\n",
  298. " # Creates the output matrix\n",
  299. " dm = np.zeros((length, length))\n",
  300. " # Adds a counter to watch the positon\n",
  301. " pos_count = 0\n",
  302. " \n",
  303. " # Populates the distance matrix\n",
  304. " for idx in xrange(length-1):\n",
  305. " pos1 = np.arange(idx+1, length)\n",
  306. " pos2 = np.arange(idx, length-1) + pos_count\n",
  307. " pos_count = pos_count + len(pos2) - 1\n",
  308. " dm[idx, pos1] = vec[pos2]\n",
  309. " dm[pos1, idx] = vec[pos2]\n",
  310. " \n",
  311. " return dm"
  312. ]
  313. },
  314. {
  315. "cell_type": "markdown",
  316. "metadata": {},
  317. "source": [
  318. "Now, I need to be able to simulate distance matrix vectors."
  319. ]
  320. },
  321. {
  322. "cell_type": "code",
  323. "execution_count": 253,
  324. "metadata": {
  325. "collapsed": true
  326. },
  327. "outputs": [],
  328. "source": [
  329. "num0 = 4\n",
  330. "num1 = 4\n",
  331. "num_d = num0 * num1"
  332. ]
  333. },
  334. {
  335. "cell_type": "code",
  336. "execution_count": 259,
  337. "metadata": {
  338. "collapsed": false
  339. },
  340. "outputs": [
  341. {
  342. "name": "stdout",
  343. "output_type": "stream",
  344. "text": [
  345. "[11 12 13 14 15 16]\n",
  346. "[21 22 23 24 25 26]\n",
  347. "[101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116]\n"
  348. ]
  349. }
  350. ],
  351. "source": [
  352. "vec0 = np.arange(0, (num0/2)*(num0-1)) + 11\n",
  353. "vec1 = np.arange(0, (num1/2)*(num1-1)) + 21\n",
  354. "veci = np.arange(0, num0 * num1) + 101\n",
  355. "print vec0\n",
  356. "print vec1\n",
  357. "print veci"
  358. ]
  359. },
  360. {
  361. "cell_type": "code",
  362. "execution_count": 328,
  363. "metadata": {
  364. "collapsed": true
  365. },
  366. "outputs": [],
  367. "source": [
  368. "vec0 = np.array([1, 2, 3])\n",
  369. "vec1 = np.array([10, 20, 30])"
  370. ]
  371. },
  372. {
  373. "cell_type": "code",
  374. "execution_count": 329,
  375. "metadata": {
  376. "collapsed": false
  377. },
  378. "outputs": [],
  379. "source": [
  380. "dm0 = convert_to_dm(3, vec0)\n",
  381. "dm1 = convert_to_dm(3, vec1)"
  382. ]
  383. },
  384. {
  385. "cell_type": "code",
  386. "execution_count": 330,
  387. "metadata": {
  388. "collapsed": false
  389. },
  390. "outputs": [
  391. {
  392. "name": "stdout",
  393. "output_type": "stream",
  394. "text": [
  395. "[[ 0. 1. 2.]\n",
  396. " [ 1. 0. 3.]\n",
  397. " [ 2. 3. 0.]]\n",
  398. "[[ 0. 10. 20.]\n",
  399. " [ 10. 0. 30.]\n",
  400. " [ 20. 30. 0.]]\n"
  401. ]
  402. }
  403. ],
  404. "source": [
  405. "print dm0\n",
  406. "print dm1"
  407. ]
  408. },
  409. {
  410. "cell_type": "code",
  411. "execution_count": 331,
  412. "metadata": {
  413. "collapsed": false
  414. },
  415. "outputs": [
  416. {
  417. "data": {
  418. "text/plain": [
  419. "array([[11, 21, 31],\n",
  420. " [12, 22, 32],\n",
  421. " [13, 23, 33]])"
  422. ]
  423. },
  424. "execution_count": 331,
  425. "metadata": {},
  426. "output_type": "execute_result"
  427. }
  428. ],
  429. "source": [
  430. "dmi = np.array([[11, 21, 31],\n",
  431. " [12, 22, 32],\n",
  432. " [13, 23, 33],\n",
  433. " ])\n",
  434. "\n",
  435. "dmi"
  436. ]
  437. },
  438. {
  439. "cell_type": "code",
  440. "execution_count": 336,
  441. "metadata": {
  442. "collapsed": false
  443. },
  444. "outputs": [
  445. {
  446. "data": {
  447. "text/plain": [
  448. "array([[13, 23, 33],\n",
  449. " [12, 22, 32],\n",
  450. " [11, 21, 31]])"
  451. ]
  452. },
  453. "execution_count": 336,
  454. "metadata": {},
  455. "output_type": "execute_result"
  456. }
  457. ],
  458. "source": [
  459. "dmi[::-1]"
  460. ]
  461. },
  462. {
  463. "cell_type": "code",
  464. "execution_count": 332,
  465. "metadata": {
  466. "collapsed": false
  467. },
  468. "outputs": [
  469. {
  470. "data": {
  471. "text/plain": [
  472. "array([[ 0., 1., 2., 11., 21., 31.],\n",
  473. " [ 1., 0., 3., 12., 22., 32.],\n",
  474. " [ 2., 3., 0., 13., 23., 33.]])"
  475. ]
  476. },
  477. "execution_count": 332,
  478. "metadata": {},
  479. "output_type": "execute_result"
  480. }
  481. ],
  482. "source": [
  483. "np.vstack([np.hstack([dm0, dmi]),\n",
  484. " [np.hstack([])]])"
  485. ]
  486. },
  487. {
  488. "cell_type": "code",
  489. "execution_count": null,
  490. "metadata": {
  491. "collapsed": true
  492. },
  493. "outputs": [],
  494. "source": []
  495. },
  496. {
  497. "cell_type": "code",
  498. "execution_count": null,
  499. "metadata": {
  500. "collapsed": true
  501. },
  502. "outputs": [],
  503. "source": []
  504. }
  505. ],
  506. "metadata": {
  507. "kernelspec": {
  508. "display_name": "Python 2",
  509. "language": "python",
  510. "name": "python2"
  511. },
  512. "language_info": {
  513. "codemirror_mode": {
  514. "name": "ipython",
  515. "version": 2
  516. },
  517. "file_extension": ".py",
  518. "mimetype": "text/x-python",
  519. "name": "python",
  520. "nbconvert_exporter": "python",
  521. "pygments_lexer": "ipython2",
  522. "version": "2.7.3"
  523. }
  524. },
  525. "nbformat": 4,
  526. "nbformat_minor": 0
  527. }