PageRenderTime 53ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/dgpu-ep-htsr-a/SOAPdenovo/src/127mer/splitReps.c

http://dgpu-ep-htsr-a.googlecode.com/
C | 440 lines | 316 code | 78 blank | 46 comment | 70 complexity | b164bd7fe99dced5263dc18a43a0d299 MD5 | raw file
Possible License(s): GPL-3.0
  1. /*
  2. * 127mer/splitReps.c
  3. *
  4. * Copyright (c) 2008-2010 BGI-Shenzhen <soap at genomics dot org dot cn>.
  5. *
  6. * This file is part of SOAPdenovo.
  7. *
  8. * SOAPdenovo is free software: you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation, either version 3 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * SOAPdenovo is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
  20. *
  21. */
  22. #include "stdinc.h"
  23. #include "newhash.h"
  24. #include "extfunc.h"
  25. #include "extvab.h"
  26. static unsigned int involved[9];
  27. static unsigned int lefts[4];
  28. static unsigned int rights[4];
  29. static unsigned char gothrough[4][4];
  30. static boolean interferingCheck ( unsigned int edgeno, int repTimes )
  31. {
  32. int i, j, t;
  33. unsigned int bal_ed;
  34. involved[0] = edgeno;
  35. i = 1;
  36. for ( j = 0; j < repTimes; j++ )
  37. { involved[i++] = lefts[j]; }
  38. for ( j = 0; j < repTimes; j++ )
  39. { involved[i++] = rights[j]; }
  40. for ( j = 0; j < i - 1; j++ )
  41. for ( t = j + 1; t < i; t++ )
  42. if ( involved[j] == involved[t] )
  43. { return 1; }
  44. for ( j = 0; j < i; j++ )
  45. {
  46. bal_ed = getTwinEdge ( involved[j] );
  47. for ( t = 0; t < i; t++ )
  48. if ( bal_ed == involved[t] )
  49. { return 1; }
  50. }
  51. return 0;
  52. }
  53. static ARC * arcCounts ( unsigned int edgeid, unsigned int * num )
  54. {
  55. ARC * arc;
  56. ARC * firstValidArc = NULL;
  57. unsigned int count = 0;
  58. arc = edge_array[edgeid].arcs;
  59. while ( arc )
  60. {
  61. if ( arc->to_ed > 0 )
  62. { count++; }
  63. if ( count == 1 )
  64. { firstValidArc = arc; }
  65. arc = arc->next;
  66. }
  67. *num = count;
  68. return firstValidArc;
  69. }
  70. static boolean readOnEdge ( long long readid, unsigned int edge )
  71. {
  72. int index;
  73. int markNum;
  74. long long * marklist;
  75. if ( edge_array[edge].markers )
  76. {
  77. markNum = edge_array[edge].multi;
  78. marklist = edge_array[edge].markers;
  79. }
  80. else
  81. { return 0; }
  82. for ( index = 0; index < markNum; index++ )
  83. {
  84. if ( readid == marklist[index] )
  85. { return 1; }
  86. }
  87. return 0;
  88. }
  89. static long long cntByReads ( unsigned int left, unsigned int middle
  90. , unsigned int right )
  91. {
  92. int markNum;
  93. long long * marklist;
  94. if ( edge_array[left].markers )
  95. {
  96. markNum = edge_array[left].multi;
  97. marklist = edge_array[left].markers;
  98. }
  99. else
  100. { return 0; }
  101. int index;
  102. long long readid;
  103. /*
  104. if(middle==8553)
  105. printf("%d markers on %d\n",markNum,left);
  106. */
  107. for ( index = 0; index < markNum; index++ )
  108. {
  109. readid = marklist[index];
  110. if ( readOnEdge ( readid, middle )
  111. && readOnEdge ( readid, right ) )
  112. {
  113. return readid;
  114. }
  115. }
  116. return 0;
  117. }
  118. /*
  119. - -
  120. > - <
  121. - -
  122. */
  123. unsigned int solvable ( unsigned int edgeno )
  124. {
  125. if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 )
  126. { return 0; }
  127. unsigned int bal_ed = getTwinEdge ( edgeno );
  128. unsigned int arcRight_n, arcLeft_n;
  129. unsigned int counter;
  130. unsigned int i, j;
  131. unsigned int branch, bal_branch;
  132. ARC * parcL, *parcR;
  133. parcL = arcCounts ( bal_ed, &arcLeft_n );
  134. if ( arcLeft_n < 2 )
  135. { return 0; }
  136. parcR = arcCounts ( edgeno, &arcRight_n );
  137. if ( arcLeft_n != arcRight_n )
  138. { return 0; }
  139. // check each right branch only has one upsteam connection
  140. /*
  141. if(edgeno==2551){
  142. for(i=0;i<arcLeft_n;i++)
  143. printf("%d,",lefts[i]);
  144. printf("__left to %d\n",edgeno);
  145. for(j=0;j<arcRight_n;j++)
  146. printf("%d,",rights[j]);
  147. printf("__right to %d\n",edgeno);
  148. }
  149. */
  150. arcRight_n = 0;
  151. while ( parcR )
  152. {
  153. if ( parcR->to_ed == 0 )
  154. {
  155. parcR = parcR->next;
  156. continue;
  157. }
  158. branch = parcR->to_ed;
  159. if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
  160. {
  161. return 0;
  162. }
  163. rights[arcRight_n++] = branch;
  164. bal_branch = getTwinEdge ( branch );
  165. arcCounts ( bal_branch, &counter );
  166. if ( counter != 1 )
  167. {
  168. return 0;
  169. }
  170. parcR = parcR->next;
  171. }
  172. // check if each left branch only has one downsteam connection
  173. arcLeft_n = 0;
  174. while ( parcL )
  175. {
  176. if ( parcL->to_ed == 0 )
  177. {
  178. parcL = parcL->next;
  179. continue;
  180. }
  181. branch = parcL->to_ed;
  182. if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
  183. { return 0; }
  184. bal_branch = getTwinEdge ( branch );
  185. lefts[arcLeft_n++] = bal_branch;
  186. arcCounts ( bal_branch, &counter );
  187. if ( counter != 1 )
  188. { return 0; }
  189. parcL = parcL->next;
  190. }
  191. //check if reads indicate one to one connection between upsteam and downstream edges
  192. for ( i = 0; i < arcLeft_n; i++ )
  193. {
  194. counter = 0;
  195. for ( j = 0; j < arcRight_n; j++ )
  196. {
  197. gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1;
  198. counter += gothrough[i][j];
  199. if ( counter > 1 )
  200. { return 0; }
  201. }
  202. if ( counter != 1 )
  203. { return 0; }
  204. }
  205. for ( j = 0; j < arcRight_n; j++ )
  206. {
  207. counter = 0;
  208. for ( i = 0; i < arcLeft_n; i++ )
  209. { counter += gothrough[i][j]; }
  210. if ( counter != 1 )
  211. { return 0; }
  212. }
  213. return arcLeft_n;
  214. }
  215. static unsigned int cp1edge ( unsigned int source, unsigned int target )
  216. {
  217. int length = edge_array[source].length;
  218. char * tightSeq;
  219. int index;
  220. unsigned int bal_source = getTwinEdge ( source );
  221. unsigned int bal_target;
  222. if ( bal_source > source )
  223. { bal_target = target + 1; }
  224. else
  225. {
  226. bal_target = target;
  227. target = target + 1;
  228. }
  229. tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
  230. for ( index = 0; index < length / 4 + 1; index++ )
  231. { tightSeq[index] = edge_array[source].seq[index]; }
  232. edge_array[target].length = length;
  233. edge_array[target].cvg = edge_array[source].cvg;
  234. edge_array[target].to_vt = edge_array[source].to_vt;
  235. edge_array[target].from_vt = edge_array[source].from_vt;
  236. edge_array[target].seq = tightSeq;
  237. edge_array[target].bal_edge = edge_array[source].bal_edge;
  238. edge_array[target].rv = NULL;
  239. edge_array[target].arcs = NULL;
  240. edge_array[target].markers = NULL;
  241. edge_array[target].flag = 0;
  242. edge_array[target].deleted = 0;
  243. tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
  244. for ( index = 0; index < length / 4 + 1; index++ )
  245. { tightSeq[index] = edge_array[bal_source].seq[index]; }
  246. edge_array[bal_target].length = length;
  247. edge_array[bal_target].cvg = edge_array[bal_source].cvg;
  248. edge_array[bal_target].to_vt = edge_array[bal_source].to_vt;
  249. edge_array[bal_target].from_vt = edge_array[bal_source].from_vt;
  250. edge_array[bal_target].seq = tightSeq;
  251. edge_array[bal_target].bal_edge = edge_array[bal_source].bal_edge;
  252. edge_array[bal_target].rv = NULL;
  253. edge_array[bal_target].arcs = NULL;
  254. edge_array[bal_target].markers = NULL;
  255. edge_array[bal_target].flag = 0;
  256. edge_array[bal_target].deleted = 0;
  257. return target;
  258. }
  259. static void moveArc2cp ( unsigned int leftEd, unsigned int rightEd,
  260. unsigned int source, unsigned int target )
  261. {
  262. unsigned int bal_left = getTwinEdge ( leftEd );
  263. unsigned int bal_right = getTwinEdge ( rightEd );
  264. unsigned int bal_source = getTwinEdge ( source );
  265. unsigned int bal_target = getTwinEdge ( target );
  266. ARC * arc;
  267. ARC * newArc, *twinArc;
  268. //between left and source
  269. arc = getArcBetween ( leftEd, source );
  270. arc->to_ed = 0;
  271. newArc = allocateArc ( target );
  272. newArc->multiplicity = arc->multiplicity;
  273. newArc->prev = NULL;
  274. newArc->next = edge_array[leftEd].arcs;
  275. if ( edge_array[leftEd].arcs )
  276. { edge_array[leftEd].arcs->prev = newArc; }
  277. edge_array[leftEd].arcs = newArc;
  278. arc = getArcBetween ( bal_source, bal_left );
  279. arc->to_ed = 0;
  280. twinArc = allocateArc ( bal_left );
  281. twinArc->multiplicity = arc->multiplicity;
  282. twinArc->prev = NULL;
  283. twinArc->next = NULL;
  284. edge_array[bal_target].arcs = twinArc;
  285. newArc->bal_arc = twinArc;
  286. twinArc->bal_arc = newArc;
  287. //between source and right
  288. arc = getArcBetween ( source, rightEd );
  289. arc->to_ed = 0;
  290. newArc = allocateArc ( rightEd );
  291. newArc->multiplicity = arc->multiplicity;
  292. newArc->prev = NULL;
  293. newArc->next = NULL;
  294. edge_array[target].arcs = newArc;
  295. arc = getArcBetween ( bal_right, bal_source );
  296. arc->to_ed = 0;
  297. twinArc = allocateArc ( bal_target );
  298. twinArc->multiplicity = arc->multiplicity;
  299. twinArc->prev = NULL;
  300. twinArc->next = edge_array[bal_right].arcs;
  301. if ( edge_array[bal_right].arcs )
  302. { edge_array[bal_right].arcs->prev = twinArc; }
  303. edge_array[bal_right].arcs = twinArc;
  304. newArc->bal_arc = twinArc;
  305. twinArc->bal_arc = newArc;
  306. }
  307. static void split1edge ( unsigned int edgeno, int repTimes )
  308. {
  309. int i, j;
  310. unsigned int target;
  311. for ( i = 1; i < repTimes; i++ )
  312. {
  313. for ( j = 0; j < repTimes; j++ )
  314. if ( gothrough[i][j] > 0 )
  315. { break; }
  316. target = cp1edge ( edgeno, extraEdgeNum );
  317. moveArc2cp ( lefts[i], rights[j], edgeno, target );
  318. extraEdgeNum += 2;
  319. }
  320. }
  321. static void debugging ( unsigned int i )
  322. {
  323. ARC * parc;
  324. parc = edge_array[i].arcs;
  325. if ( !parc )
  326. { printf ( "no downward connection for %d\n", i ); }
  327. while ( parc )
  328. {
  329. printf ( "%d -> %d\n", i, parc->to_ed );
  330. parc = parc->next;
  331. }
  332. }
  333. void solveReps()
  334. {
  335. unsigned int i;
  336. unsigned int repTime;
  337. int counter = 0;
  338. boolean flag;
  339. //debugging(30514);
  340. extraEdgeNum = num_ed + 1;
  341. for ( i = 1; i <= num_ed; i++ )
  342. {
  343. repTime = solvable ( i );
  344. if ( repTime == 0 )
  345. { continue; }
  346. flag = interferingCheck ( i, repTime );
  347. if ( flag )
  348. { continue; }
  349. split1edge ( i, repTime );
  350. counter ++; //+= 2*(repTime-1);
  351. if ( EdSmallerThanTwin ( i ) )
  352. { i++; }
  353. }
  354. printf ( "%d repeats solvable, %d more edges\n", counter, extraEdgeNum - 1 - num_ed );
  355. num_ed = extraEdgeNum - 1;
  356. removeDeadArcs();
  357. if ( markersArray )
  358. {
  359. free ( ( void * ) markersArray );
  360. markersArray = NULL;
  361. }
  362. }