/examples/data/ensembl_pax_sub.fasta

http://github.com/sbotond/phylosim · Unknown · 385 lines · 385 code · 0 blank · 0 comment · 0 complexity · a85b058bef53d25e37d645e781f1bb35 MD5 · raw file

  1. >ENSOGAP00000013678
  2. ------------------------------------------------------------
  3. ------------------------------------------------------------
  4. ------------------------------------------------------------
  5. ------------------------------------------------------------
  6. ------------------------------------------------------------
  7. ------------------------------------------------------------
  8. ------------------------------------------------------------
  9. ------------------------------------------------------------
  10. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  11. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  12. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  13. ------------------------------------------------------------
  14. ------------------------------------------------------------
  15. DNDTVPS------------------------VSSINRIIRTKV-----------------
  16. ----------------------QQPFH---------------------------------
  17. ------------------------------------------------------------
  18. ---------------------------------------------PTPDG-AGTGVTAPG
  19. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  20. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRX
  21. X-XXXXX----------------------------XXXXXXXXXX---------------
  22. -------------------------X------XXXXXXXXXX-XXXXX------------
  23. --------------XXXXXX-XXXXXXXXXXXXXX-------XXXXXX--GNE-YS-L--
  24. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  25. -------------------------------------------------------NNVS-
  26. -GTQTYP--------------VVT------------------------------------
  27. ---------------------------------------------------GRDMASTT-
  28. --LPG--------------------------------------------------YPPHV
  29. PP----------------------------------------------------------
  30. --------------------------------------------------------TGQG
  31. S----YPTST-----L-----------AGMVP----------------------------
  32. ------------------GX---------------------------------XXXXNPY
  33. SHP------QYTAYNE-AWRFSNP------------ALL-------------------MP
  34. PPGAPPL-----------------------------PLLPLP----MTATS--YRGDH--
  35. ----IKLQADSFGLHIVPV---------------
  36. >ENSPTRP00000005013
  37. ----------------------MDMHCKADP---FSAM----HP----------------
  38. ------------------------------------------------------------
  39. ------------------------------------------------------------
  40. ------------------------------------------------------------
  41. ------------------------------------------------------------
  42. ------------------------------------------------------------
  43. ------------------------------------------------------------
  44. ------------------------------------------------------------
  45. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  46. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  47. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  48. ------------------------------------------------------------
  49. ------------------------------------------------------------
  50. DNDTVPS------------------------VSSINRIIRTKV-----------------
  51. ----------------------QQPFH---------------------------------
  52. ------------------------------------------------------------
  53. ---------------------------------------------PTPDG-AGTGVTAPG
  54. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  55. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  56. V-SEGSV----------------------------PNGDSQSGVD---------------
  57. -------------------------S------LRKHLRADTF-TQQQL------------
  58. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  59. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  60. -------------------------------------------------------SNVS-
  61. -GTQTYP--------------VVT------------------------------------
  62. ---------------------------------------------------GRDMASTT-
  63. --LPG--------------------------------------------------YPPHV
  64. PP----------------------------------------------------------
  65. --------------------------------------------------------TGQG
  66. S----YPTST-----L-----------AGMVP----------------------------
  67. ------------------EA---------------------------------AVGPSSS
  68. --------------------------------------L-------------------MS
  69. KPGRKLA-----------------------------EVPPCV----QPTGA--SSPATRT
  70. ATPSTRPTTRLGDSATPPY---------------
  71. >ENSGGOP00000014550
  72. ----------------------MAGPCCVWGVVFFSCL----SP--A-------------
  73. ------------------------------------------------------------
  74. ------------------------------------------------------------
  75. ------------------------------------------------------------
  76. ------------------------------------------------------------
  77. ------------------------------------------------------------
  78. ------------------------------------------------------------
  79. ------------------------------------------------------------
  80. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  81. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  82. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  83. ------------------------------------------------------------
  84. ------------------------------------------------------------
  85. DNDTVPS------------------------VSSINRIIRTKV-----------------
  86. ----------------------QQPFH---------------------------------
  87. ------------------------------------------------------------
  88. ---------------------------------------------PTPDG-AGTGVTAPG
  89. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  90. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  91. V-SEGSV----------------------------PNGDSQSGVD---------------
  92. -------------------------S------LRKHLRADTF-TQQQL------------
  93. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  94. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  95. -------------------------------------------------------SNVS-
  96. -GTQTYP--------------VVT------------------------------------
  97. ---------------------------------------------------GRDMASTT-
  98. --LPG--------------------------------------------------YPPHV
  99. PP----------------------------------------------------------
  100. --------------------------------------------------------TGQG
  101. S----YPTST-----L-----------AGMVP----------------------------
  102. ------------------EA---------------------------------AVGPSSS
  103. --------------------------------------L-------------------MS
  104. KPGRKLA-----------------------------EVPPCV----QPTVC--HGPSTAP
  105. THPSLCP---------------------------
  106. >ENSMMUP00000012017
  107. ----------------------MDMHCKADP---FSAM----HP----------------
  108. ------------------------------------------------------------
  109. ------------------------------------------------------------
  110. ------------------------------------------------------------
  111. ------------------------------------------------------------
  112. ------------------------------------------------------------
  113. ------------------------------------------------------------
  114. ------------------------------------------------------------
  115. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  116. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  117. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  118. ------------------------------------------------------------
  119. ------------------------------------------------------------
  120. DNDTVPS------------------------VSSINRIIRTKV-----------------
  121. ----------------------QQPFH---------------------------------
  122. ------------------------------------------------------------
  123. ---------------------------------------------PTPDG-AGTGVTAPG
  124. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  125. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  126. V-SEGSV----------------------------PNGDSQSGVD---------------
  127. -------------------------S------LRKHLRADTF-TQQQL------------
  128. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  129. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  130. -------------------------------------------------------SNVS-
  131. -GTQTYP--------------VVT------------------------------------
  132. ---------------------------------------------------GRDMASTT-
  133. --LPG--------------------------------------------------YPPHV
  134. PP----------------------------------------------------------
  135. --------------------------------------------------------TGQG
  136. S----YPTST-----L-----------AGMVP----------------------------
  137. ------------------GS---------------------------------EFSGNPY
  138. SHP------QYTAYNE-AWRFSNP------------ALL-------------------MP
  139. PPGAPPL-----------------------------PLLPL------------PMTATSY
  140. RGDHIKLQADSFGLHIVPV---------------
  141. >ENSMICP00000001240
  142. ----------------------MDMHCKADP---FSAM----HP----------------
  143. ------------------------------------------------------------
  144. ------------------------------------------------------------
  145. ------------------------------------------------------------
  146. ------------------------------------------------------------
  147. ------------------------------------------------------------
  148. ------------------------------------------------------------
  149. ------------------------------------------------------------
  150. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  151. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  152. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  153. ------------------------------------------------------------
  154. ------------------------------------------------------------
  155. DNDTVPS------------------------VSSINRIIRTKV-----------------
  156. ----------------------QQPFH---------------------------------
  157. ------------------------------------------------------------
  158. ---------------------------------------------PTPDG-AGTGVTAPG
  159. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  160. GILGI------PRSNGE-------------KRKRDEGEVYTDPVHIRGGGGLHLVWTLRX
  161. X-XXXXV----------------------------PNGDSQSGVD---------------
  162. -------------------------S------LRKHLRADTF-TQQQL------------
  163. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  164. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  165. -------------------------------------------------------SNVS-
  166. -GTQTYP--------------VVT------------------------------------
  167. ---------------------------------------------------GRDMASTT-
  168. --LPG--------------------------------------------------YPPHV
  169. PP----------------------------------------------------------
  170. --------------------------------------------------------TGQG
  171. S----YPTST-----L-----------AGMVP----------------------------
  172. ------------------GS---------------------------------EFSGNPY
  173. SHP------QYTAYNE-AWRFSNP------------ALL---------------------
  174. ------------------------------------------------------------
  175. ----------------------------------
  176. >ENSSTOP00000010388
  177. ------------------------------------------------------------
  178. ------------------------------------------------------------
  179. ------------------------------------------------------------
  180. ------------------------------------------------------------
  181. ------------------------------------------------------------
  182. ------------------------------------------------------------
  183. ------------------------------------------------------------
  184. ------------------------------------------------------------
  185. -GHGGV-QLGGVXXXXXXX--------------XXXXXXXXXXXXXX-XXXXXXXXXXXX
  186. ------------------XXXGCVSKILG--------RYYETGSIKPGVI------GGSK
  187. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  188. ------------------------------------------------------------
  189. ------------------------------------------------------------
  190. DNDTVPS------------------------VSSINXXXXXXX-----------------
  191. ----------------------XXXXX---------------------------------
  192. ------------------------------------------------------------
  193. ---------------------------------------------XXXXX-XXXXXXXXX
  194. --XXXXXXXXXXXXX----------------XXX-------------XXXXXX---XXXX
  195. XXXXX------XXXXXX-------------XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  196. X-XXXXX----------------------------XXXXXXXXXX---------------
  197. -------------------------X------XXXXXXXXXX-XXXXX------------
  198. --------------XXXXXX-XXXXXXXXXXXXXX-------XXXXXX--GNE-YS-L--
  199. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  200. -------------------------------------------------------SNVS-
  201. -GTQTYP--------------VVT------------------------------------
  202. ---------------------------------------------------GRDMASTT-
  203. --LPG--------------------------------------------------YPPHV
  204. PP----------------------------------------------------------
  205. --------------------------------------------------------TGQG
  206. S----YPTST-----L-----------AGMVP----------------------------
  207. ------------------GS---------------------------------EFSGNPY
  208. SHP------QYTAYNE-AWRFSNP------------ALL---------------------
  209. ------------------------------------------------------------
  210. ----------------------------------
  211. >ENSCJAP00000032077
  212. ----------------------MDMHCKADP---FSAM----HP----------------
  213. ------------------------------------------------------------
  214. ------------------------------------------------------------
  215. ------------------------------------------------------------
  216. ------------------------------------------------------------
  217. ------------------------------------------------------------
  218. ------------------------------------------------------------
  219. ------------------------------------------------------------
  220. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  221. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  222. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  223. ------------------------------------------------------------
  224. ------------------------------------------------------------
  225. DNDTVPS------------------------VSSINRIIRTKV-----------------
  226. ----------------------QQPFH---------------------------------
  227. ------------------------------------------------------------
  228. ---------------------------------------------PTPDG-AGTGVTAPG
  229. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  230. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  231. V-SEGSV----------------------------PNGDSQSGVD---------------
  232. -------------------------S------LRKHLRADTF-TQQQL------------
  233. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  234. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  235. -------------------------------------------------------SNVS-
  236. -GTQTYP--------------VVT------------------------------------
  237. ---------------------------------------------------GRDMASTT-
  238. --LPG--------------------------------------------------YPPHV
  239. PP----------------------------------------------------------
  240. --------------------------------------------------------TGQG
  241. S----YPTST-----L-----------AGMVP----------------------------
  242. ------------------GS---------------------------------EFSGNPY
  243. SHP------QYTAYNE-AWRFSNP------------ALL-------------------SS
  244. PYYYSAA-----------------------------PRGSAP----AAAAA--AYDRH--
  245. ----------------------------------
  246. >ENSDORP00000005799
  247. ----------------------MDMHCKADP---FSAM----HP----------------
  248. ------------------------------------------------------------
  249. ------------------------------------------------------------
  250. ------------------------------------------------------------
  251. ------------------------------------------------------------
  252. ------------------------------------------------------------
  253. ------------------------------------------------------------
  254. ------------------------------------------------------------
  255. -EHGGVNQLGG-FVNGRP---------------PDVV-QRIVELAQQ-GVRPCDISRQLR
  256. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  257. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  258. ------------------------------------------------------------
  259. ------------------------------------------------------------
  260. DNDTVPS------------------------VSSINRIIRTKV-----------------
  261. ----------------------QQPFH---------------------------------
  262. ------------------------------------------------------------
  263. ---------------------------------------------PTPDG-AGTGVTAPG
  264. --HTIVPSNASPPVS----------------SAS-------------NDPEGS---YSIN
  265. GXXXX------XXXXXX-------------XXXXXXXEVYTDPAHIRGGRGLQLVWTLRD
  266. V-SEGSV----------------------------PNGDSQSGVD---------------
  267. -------------------------S------LRKHLRADTF-TQQQL------------
  268. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  269. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  270. -------------------------------------------------------SNVS-
  271. -GTQTYP--------------VVT------------------------------------
  272. ---------------------------------------------------GRDMASTT-
  273. --LPG--------------------------------------------------YPPHV
  274. PP----------------------------------------------------------
  275. --------------------------------------------------------TGQG
  276. S----YPTST-----L-----------AGMVP----------------------------
  277. -----------------------------------------------------GAAVGPS
  278. SS-----------------HMSNP------------GFT-------------------E-
  279. ----------------------------------------VR----MTXXX--XXXXXXX
  280. XXXXXXXXXXXXXXXXXHY---------------
  281. >ENSP00000359319
  282. ----------------------MDMHCKADP---FSAM----HP----------------
  283. ------------------------------------------------------------
  284. ------------------------------------------------------------
  285. ------------------------------------------------------------
  286. ------------------------------------------------------------
  287. ------------------------------------------------------------
  288. ------------------------------------------------------------
  289. ------------------------------------------------------------
  290. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  291. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  292. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  293. ------------------------------------------------------------
  294. ------------------------------------------------------------
  295. DNDTVPS------------------------VSSINRIIRTKV-----------------
  296. ----------------------QQPFH---------------------------------
  297. ------------------------------------------------------------
  298. ---------------------------------------------PTPDG-AGTGVTAPG
  299. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  300. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  301. V-SEGSV----------------------------PNGDSQSGVD---------------
  302. -------------------------S------LRKHLRADTF-TQQQL------------
  303. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  304. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  305. -------------------------------------------------------SNVS-
  306. -GTQTYP--------------VVT------------------------------------
  307. ---------------------------------------------------GRDMASTT-
  308. --LPG--------------------------------------------------YPPHV
  309. PP----------------------------------------------------------
  310. --------------------------------------------------------TGQG
  311. S----YPTST-----L-----------AGMVP----------------------------
  312. ------------------EA---------------------------------AVGPSSS
  313. --------------------------------------L-------------------MS
  314. KPGRKLA-----------------------------EVPPCV----QPTGA--SSPATRT
  315. ATPSTRPTTRLGDSATPPY---------------
  316. >ENSCPOP00000000844
  317. ----------------------MDMHCKADP---FSAM----HP----------------
  318. ------------------------------------------------------------
  319. ------------------------------------------------------------
  320. ------------------------------------------------------------
  321. ------------------------------------------------------------
  322. ------------------------------------------------------------
  323. ------------------------------------------------------------
  324. ------------------------------------------------------------
  325. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  326. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  327. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  328. ------------------------------------------------------------
  329. ------------------------------------------------------------
  330. DNDTVPS------------------------VSSINRIIRTKV-----------------
  331. ----------------------QQPFH---------------------------------
  332. ------------------------------------------------------------
  333. ---------------------------------------------PTPDG-TGTGVSAPG
  334. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  335. GILGI------PRSNGE-------------KRKRDE-----------------------D
  336. V-SEGSV----------------------------PNGDSQSGVD---------------
  337. -------------------------S------LRKHLRADTF-TQQQL------------
  338. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  339. PTLTP-G-------LDEVKSGLSAS-TNPELG----------------------------
  340. -------------------------------------------------------SNVS-
  341. -GTQTYP--------------VVT------------------------------------
  342. ---------------------------------------------------GRDMASTT-
  343. --LPG--------------------------------------------------YPPHV
  344. PP----------------------------------------------------------
  345. --------------------------------------------------------TGQG
  346. S----YPTST-----L-----------AGMVP----------------------------
  347. ------------------VP---------------------------------RGCNGP-
  348. ---------------------SSS------------LMN-------------------NS
  349. DRKLAEV-----------------------------PFTLHR----GPSPA--PTPQEYW
  350. PPPVTPPTTRPGNSATPAL---------------
  351. >ENSPPYP00000002986
  352. ----------------------MDMHCKADP---FSAM----HP----------------
  353. ------------------------------------------------------------
  354. ------------------------------------------------------------
  355. ------------------------------------------------------------
  356. ------------------------------------------------------------
  357. ------------------------------------------------------------
  358. ------------------------------------------------------------
  359. ------------------------------------------------------------
  360. -GHGGVNQLGGVFVNGRPL--------------PDVVRQRIVELAHQ-GVRPCDISRQLR
  361. ------------------VSHGCVSKILG--------RYYETGSIKPGVI------GGSK
  362. PK-VATPKVVDKIAEYKRQNPTMFAWEI----------------------RDRLLAEGIC
  363. ------------------------------------------------------------
  364. ------------------------------------------------------------
  365. DNDTVPS------------------------VSSINRIIRTKV-----------------
  366. ----------------------QQPFH---------------------------------
  367. ------------------------------------------------------------
  368. ---------------------------------------------PTPDG-AGTGVTAPG
  369. --HTIVPSTASPPVS----------------SAS-------------NDPVGS---YSIN
  370. GILGI------PRSNGE-------------KRKRDEVEVYTDPAHIRGGGGLHLVWTLRD
  371. V-SEGSV----------------------------PNGDSQSGVD---------------
  372. -------------------------S------LRKHLRADTF-TQQQL------------
  373. --------------EALDRV-FERPSYPDVFQASE-------HIKSEQ--GNE-YS-L--
  374. PALTP-G-------LDEVKSSLSAS-TNPELG----------------------------
  375. -------------------------------------------------------SNVS-
  376. -GTQTYP--------------VVT------------------------------------
  377. ---------------------------------------------------GRDMASTT-
  378. --LPG--------------------------------------------------YPPHV
  379. PP----------------------------------------------------------
  380. --------------------------------------------------------TGQG
  381. S----YPTST-----L-----------AGMVP----------------------------
  382. ------------------EA---------------------------------AVGPSSS
  383. --------------------------------------L-------------------MS
  384. KPGRKLA-----------------------------EVPPCV----QPT-----------
  385. ----------------------------------