/src/grappa/growTree.c

https://code.google.com/p/poy/ · C · 1007 lines · 887 code · 66 blank · 54 comment · 165 complexity · 423ff59a77b256b9da065b7120e2b51c MD5 · raw file

  1. /*
  2. This is the file to define the process of a hill climbing method.
  3. The idea is simple: take the first three genomes, which will create
  4. one tree, then pick the fourth from the remaining list, try to insert
  5. it to one of the three edges, and choose the one with the minimum
  6. score, then go on to the fifth, till the end. However, the considering
  7. is that the ordering of the input is important, so we will try
  8. to find an eliminated graph and reorder the input in order to this
  9. graph, hence there are many functions evolved here.
  10. This file is implemented by Jijun Tang and can be reached at
  11. tangjijun@hotmail.com, please refer to his thesis for detail.
  12. */
  13. #include "structs.h"
  14. #include "labeltree.h"
  15. #include "growTree.h"
  16. #include "circ_order.h"
  17. #include <time.h>
  18. #include <float.h>
  19. extern int init_score_tree ( int COND, struct tNode *tree,
  20. int NUM_GENES, int NUM_GENOMES,
  21. int tspsolver, int thresh,
  22. struct adj_struct *adj_list,
  23. struct adj_struct *adj_pool,
  24. intpair_t * neighbors, int *stack, int *incycle,
  25. int *outcycle, int **weights, int *degree,
  26. int *otherEnd, edge_t * edges,
  27. struct tNode *tpool,
  28. struct genome_struct *labels, int *pred1,
  29. int *pred2, int *picked, int *decode, int *genes,
  30. int CIRCULAR, int distmethod, distmem_t distmem,
  31. int CORRECTION, struct qNode *qpool,
  32. int *edgepool, struct genome_struct *genome_list,
  33. smalledge_t * smalledges, int **status,
  34. triple_t * triple, int inittspsolver, int OPT,
  35. int initmethod );
  36. /* it seemed that I have to init tree, because everything will be changed
  37. when add a new genome into the tree, not only the close three, potentially
  38. every internode may be changed, if to store it is another case (huge
  39. restore , copy and do again*/
  40. int
  41. growTree ( int COND, struct tNode **tree, int NUM_GENES, int NUM_GENOMES,
  42. int tspsolver, int thresh,
  43. struct adj_struct *adj_list, struct adj_struct *adj_pool,
  44. intpair_t * neighbors,
  45. int *stack, int *incycle, int *outcycle, int **weights,
  46. int *degree, int *otherEnd, edge_t * edges,
  47. struct tNode *tpool, struct genome_struct *labels,
  48. int *pred1, int *pred2, int *picked, int *decode,
  49. int *genes, int CIRCULAR,
  50. int distmethod, distmem_t distmem, int CORRECTION,
  51. struct qNode *qpool, int *edgepool,
  52. struct genome_struct *genome_list, smalledge_t * smalledges,
  53. int **status, triple_t * triple, int inittspsolver, int OPT,
  54. int initmethod, bin_env_t * bin_env )
  55. {
  56. int score, mtag, ntag, i;
  57. struct tNode *internal, *newNode, *cutPoint;
  58. /* initial with the first three genomes */
  59. if ( *tree == NULL )
  60. {
  61. init_gen_bin ( 3, bin_env );
  62. printf("In Grow Tree %2d\n",genome_list[2].genes[2]);
  63. *tree = first ( 3, tpool, 0, bin_env );
  64. printf("In Grow Tree %2d\n",genome_list[2].genes[2]);
  65. ( *tree )->leaf = TRUE;
  66. score =
  67. init_score_tree ( COND, *tree, NUM_GENES, 3, tspsolver, thresh,
  68. adj_list, adj_pool, neighbors, stack, incycle,
  69. outcycle, weights, degree, otherEnd, edges,
  70. tpool, labels, pred1, pred2, picked, decode,
  71. genes, CIRCULAR, distmethod, distmem,
  72. CORRECTION, qpool, edgepool, genome_list,
  73. smalledges, status, triple, inittspsolver, OPT,
  74. initmethod );
  75. }
  76. printf("I have init scored \n");
  77. /* add one by one */
  78. for ( i = 4; i <= NUM_GENOMES; i++ )
  79. {
  80. /* find how many leaves and internal nodes in the current tree */
  81. mtag = 0;
  82. ntag = 0;
  83. findNumbers ( *tree, &mtag, &ntag );
  84. /* now create new leaf and internal node, with topology relation */
  85. newNode = &tpool[mtag + ntag];
  86. internal = &tpool[mtag + ntag + 1];
  87. newNode->leaf = TRUE;
  88. newNode->parent = internal;
  89. newNode->lChild = newNode->rChild = NULL;
  90. newNode->tag = mtag + 1;
  91. internal->rChild = newNode;
  92. internal->leaf = FALSE;
  93. internal->tag = ( 0 - ntag - 1 );
  94. /* find the best position to add the new edge, which will return a node */
  95. score = LARGENUM;
  96. cutPoint =
  97. addNewEdge ( COND, ( *tree )->lChild, *tree, internal, newNode,
  98. &score, NUM_GENES, i, tspsolver, thresh, adj_list,
  99. adj_pool, neighbors, stack, incycle, outcycle,
  100. weights, degree, otherEnd, edges, tpool, labels,
  101. pred1, pred2, picked, decode, genes, CIRCULAR,
  102. distmethod, distmem, CORRECTION, qpool, edgepool,
  103. genome_list, smalledges, status, triple,
  104. inittspsolver, OPT, initmethod );
  105. /* the new edge should be added between cutPoint and its parent */
  106. if ( cutPoint->parent->lChild == cutPoint )
  107. {
  108. cutPoint->parent->lChild = internal;
  109. }
  110. else
  111. {
  112. cutPoint->parent->rChild = internal;
  113. }
  114. internal->parent = cutPoint->parent;
  115. cutPoint->parent = internal;
  116. internal->lChild = cutPoint;
  117. ( *tree )->leaf = TRUE;
  118. /* score the current best tree? */
  119. score =
  120. init_score_tree ( COND, *tree, NUM_GENES, i, tspsolver, thresh,
  121. adj_list, adj_pool, neighbors, stack, incycle,
  122. outcycle, weights, degree, otherEnd, edges,
  123. tpool, labels, pred1, pred2, picked, decode,
  124. genes, CIRCULAR, distmethod, distmem,
  125. CORRECTION, qpool, edgepool, genome_list,
  126. smalledges, status, triple, inittspsolver, OPT,
  127. initmethod );
  128. /*print_tree_nexus(*tree); */
  129. }
  130. return score;
  131. }
  132. /* copy everything in node1 to node2*/
  133. void
  134. copyNode ( struct tNode *node1, struct tNode *node2, int num_genes )
  135. {
  136. int i;
  137. /* make copy of the node2's genome ptr */
  138. struct genome_struct *tmp_genome = node2->genome;
  139. /*memcpy everythin */
  140. memcpy ( node2, node1, sizeof ( struct tNode ) );
  141. /* restore genome of node2 */
  142. node2->genome = tmp_genome;
  143. /*copy genes and everthing */
  144. for ( i = 0; i < num_genes; i++ )
  145. {
  146. node2->genome->genes[i] = node1->genome->genes[i];
  147. }
  148. node2->genome->encoding = node1->genome->encoding;
  149. node2->genome->genome_num = node1->genome->genome_num;
  150. node2->genome->gnamePtr = node1->genome->gnamePtr;
  151. return;
  152. }
  153. /* internal->tag internal->rChild=newNode newNode->paren=internal, newNode->l=r=NULL, newNode->leaf = true*/
  154. struct tNode *
  155. addNewEdge ( int COND, struct tNode *tree, struct tNode *root,
  156. struct tNode *internal, struct tNode *newNode, int *score,
  157. int num_genes, int num_genomes, int tspsolver, int thresh,
  158. struct adj_struct *adj_list, struct adj_struct *adj_pool,
  159. intpair_t * neighbors, int *stack, int *incycle, int *outcycle,
  160. int **weights, int *degree, int *otherEnd, edge_t * edges,
  161. struct tNode *tpool, struct genome_struct *labels, int *pred1,
  162. int *pred2, int *picked, int *decode, int *genes, int CIRCULAR,
  163. int distmethod, distmem_t distmem, int CORRECTION,
  164. struct qNode *qpool, int *edgepool,
  165. struct genome_struct *genome_list, smalledge_t * smalledges,
  166. int **status, triple_t * triple, int inittspsolver, int OPT,
  167. int initmethod )
  168. {
  169. int scores[5];
  170. struct tNode *lReturn, *rReturn;
  171. scores[0] = scores[1] = scores[2] = scores[4] = LARGENUM * num_genes;
  172. /* make new topology between the new edge and the tree, that is, add the new edge
  173. into the tree */
  174. if ( tree->parent->lChild == tree )
  175. {
  176. tree->parent->lChild = internal;
  177. }
  178. else
  179. {
  180. tree->parent->rChild = internal;
  181. }
  182. internal->parent = tree->parent;
  183. tree->parent = internal;
  184. internal->lChild = tree;
  185. root->leaf = TRUE;
  186. /* score the new tree */
  187. scores[0] =
  188. init_score_tree ( COND, root, num_genes, num_genomes, tspsolver,
  189. thresh, adj_list, adj_pool, neighbors, stack,
  190. incycle, outcycle, weights, degree, otherEnd, edges,
  191. tpool, labels, pred1, pred2, picked, decode, genes,
  192. CIRCULAR, distmethod, distmem, CORRECTION, qpool,
  193. edgepool, genome_list, smalledges, status, triple,
  194. inittspsolver, OPT, initmethod );
  195. /* restore the tree back to the input status, delete the new edge */
  196. tree->parent = internal->parent;
  197. if ( tree->parent->lChild == internal )
  198. {
  199. tree->parent->lChild = tree;
  200. }
  201. else
  202. {
  203. tree->parent->rChild = tree;
  204. }
  205. /* if the input node is a leaf, return now */
  206. if ( tree->leaf )
  207. {
  208. *score = scores[0];
  209. return tree;
  210. }
  211. /* recursive to child */
  212. root->leaf = TRUE;
  213. lReturn =
  214. addNewEdge ( COND, tree->lChild, root, internal, newNode, &scores[1],
  215. num_genes, num_genomes, tspsolver, thresh, adj_list,
  216. adj_pool, neighbors, stack, incycle, outcycle, weights,
  217. degree, otherEnd, edges, tpool, labels, pred1, pred2,
  218. picked, decode, genes, CIRCULAR, distmethod, distmem,
  219. CORRECTION, qpool, edgepool, genome_list, smalledges,
  220. status, triple, inittspsolver, OPT, initmethod );
  221. rReturn =
  222. addNewEdge ( COND, tree->rChild, root, internal, newNode, &scores[2],
  223. num_genes, num_genomes, tspsolver, thresh, adj_list,
  224. adj_pool, neighbors, stack, incycle, outcycle, weights,
  225. degree, otherEnd, edges, tpool, labels, pred1, pred2,
  226. picked, decode, genes, CIRCULAR, distmethod, distmem,
  227. CORRECTION, qpool, edgepool, genome_list, smalledges,
  228. status, triple, inittspsolver, OPT, initmethod );
  229. /* compare the scores, find the best one, return it */
  230. scores[3] = scores[0];
  231. if ( scores[3] > scores[1] )
  232. scores[3] = scores[1];
  233. if ( scores[3] > scores[2] )
  234. scores[3] = scores[2];
  235. if ( scores[3] == scores[0] )
  236. {
  237. *score = scores[0];
  238. return tree;
  239. }
  240. if ( scores[3] == scores[1] )
  241. {
  242. *score = scores[1];
  243. return tree->lChild;
  244. }
  245. else
  246. {
  247. *score = scores[2];
  248. return tree->rChild;
  249. }
  250. }
  251. /* to find how many internal and leaves*/
  252. void
  253. findNumbers ( struct tNode *tree, int *mtag, int *ntag )
  254. {
  255. if ( tree == NULL )
  256. return;
  257. if ( tree->leaf )
  258. ( *mtag )++;
  259. else
  260. ( *ntag )++;
  261. findNumbers ( tree->lChild, mtag, ntag );
  262. findNumbers ( tree->rChild, mtag, ntag );
  263. return;
  264. }
  265. void
  266. orderGenome ( struct genome_struct *genome_list,
  267. struct genome_struct *cpgenome, int **cpdist, int **distmatrix,
  268. int num_genes, int num_genomes, int threshHold, int start )
  269. {
  270. int i, j;
  271. int *result;
  272. result = ( int * ) malloc ( sizeof ( int ) * num_genomes );
  273. for ( i = 0; i < num_genomes; i++ )
  274. {
  275. cpdist[i] = ( int * ) malloc ( sizeof ( int ) * num_genomes );
  276. for ( j = 0; j < num_genomes; j++ )
  277. {
  278. cpdist[i][j] = distmatrix[i][j];
  279. if ( cpdist[i][j] > threshHold )
  280. cpdist[i][j] = -1;
  281. }
  282. }
  283. eliminateGraph ( cpdist, result, num_genomes, start );
  284. for ( i = 0; i < num_genomes; i++ )
  285. {
  286. printf ( "%d ", result[i] );
  287. cpgenome[i].encoding = genome_list[result[i]].encoding;
  288. for ( j = 0; j < num_genes; j++ )
  289. cpgenome[i].genes[j] = genome_list[result[i]].genes[j];
  290. cpgenome[i].genome_num = i + 1;
  291. cpgenome[i].gnamePtr = genome_list[result[i]].gnamePtr;
  292. /*memcpy(&cpgenome[i], &genome_list[result[i]], sizeof(struct genome_struct)); */
  293. }
  294. free ( result );
  295. }
  296. void
  297. eliminateGraph ( int **dist, int *order, int num, int start )
  298. {
  299. int i, j;
  300. struct graph_vertex **vertexList;
  301. struct vertex_stack *aStack;
  302. int *L;
  303. int x, v, y, maxL;
  304. int test, x1;
  305. L = ( int * ) malloc ( sizeof ( int ) * num );
  306. vertexList =
  307. ( struct graph_vertex ** ) malloc ( sizeof ( struct graph_vertex * ) *
  308. num );
  309. for ( i = 0; i < num; i++ )
  310. {
  311. vertexList[i] = createVertex ( num, i );
  312. L[i] = 0;
  313. }
  314. for ( i = 0; i < num; i++ )
  315. {
  316. for ( j = 0; j < num; j++ )
  317. {
  318. if ( dist[i][j] > 0 && j != vertexList[i]->name )
  319. addNeighbor ( vertexList[j], vertexList[i] );
  320. }
  321. }
  322. aStack = createStack ( );
  323. x = start;
  324. L[x] = -1;
  325. for ( i = num - 1; i >= 0; i-- )
  326. {
  327. v = x;
  328. order[i] = v;
  329. stack_push ( aStack, vertexList[v] );
  330. for ( j = 0; j < num; j++ )
  331. {
  332. if ( neighbor ( vertexList[j], vertexList[v] ) )
  333. {
  334. if ( L[j] >= 0 )
  335. {
  336. L[j] = L[j] + 1;
  337. }
  338. }
  339. }
  340. test = 1;
  341. while ( !stack_empty ( aStack ) && test )
  342. {
  343. y = findOne ( aStack )->name;
  344. x1 = -1;
  345. maxL = -2;
  346. for ( j = 0; j < num; j++ )
  347. {
  348. if ( neighbor ( vertexList[j], vertexList[y] ) )
  349. {
  350. if ( L[j] > maxL )
  351. {
  352. maxL = L[j];
  353. x1 = j;
  354. }
  355. }
  356. }
  357. if ( L[x1] > 0 )
  358. {
  359. x = x1;
  360. L[x1] = -1;
  361. test = 0;
  362. }
  363. else
  364. stack_pop ( aStack );
  365. }
  366. }
  367. free ( L );
  368. for ( i = 0; i < num; i++ )
  369. {
  370. free ( vertexList[i]->neighbors );
  371. free ( vertexList[i] );
  372. }
  373. free ( vertexList );
  374. while ( aStack->head != NULL )
  375. {
  376. stack_pop ( aStack );
  377. }
  378. return;
  379. }
  380. struct graph_vertex *
  381. createVertex ( int num_neighbor, int name )
  382. {
  383. struct graph_vertex *newVertex;
  384. int i;
  385. newVertex =
  386. ( struct graph_vertex * ) malloc ( sizeof ( struct graph_vertex ) );
  387. newVertex->neighbors =
  388. ( struct graph_vertex ** ) malloc ( sizeof ( struct graph_vertex * ) *
  389. ( 1 + num_neighbor ) );
  390. for ( i = 0; i <= num_neighbor; i++ )
  391. {
  392. newVertex->neighbors[i] = NULL;
  393. }
  394. newVertex->name = name;
  395. newVertex->num_neighbors = 0;
  396. return newVertex;
  397. }
  398. /* add v1 to v2's neighbor*/
  399. void
  400. addNeighbor ( struct graph_vertex *v1, struct graph_vertex *v2 )
  401. {
  402. v2->neighbors[v2->num_neighbors] = v1;
  403. v2->num_neighbors++;
  404. }
  405. struct vertex_stack *
  406. createStack ( )
  407. {
  408. struct vertex_stack *newStack;
  409. newStack =
  410. ( struct vertex_stack * ) malloc ( sizeof ( struct vertex_stack ) );
  411. newStack->head = NULL;
  412. return newStack;
  413. }
  414. struct graph_vertex *
  415. stack_pop ( struct vertex_stack *aStack )
  416. {
  417. struct stack_node *tmp;
  418. struct graph_vertex *reVertex;
  419. if ( aStack == NULL || aStack->head == NULL )
  420. return NULL;
  421. tmp = aStack->head;
  422. aStack->head = aStack->head->next;
  423. reVertex = tmp->theVertex;
  424. free ( tmp );
  425. return reVertex;
  426. }
  427. void
  428. stack_push ( struct vertex_stack *aStack, struct graph_vertex *aVertex )
  429. {
  430. struct stack_node *tmp1, *tmp2;
  431. if ( aStack == NULL || aVertex == NULL )
  432. return;
  433. tmp1 = ( struct stack_node * ) malloc ( sizeof ( struct stack_node ) );
  434. tmp1->next = NULL;
  435. tmp1->theVertex = aVertex;
  436. tmp2 = aStack->head;
  437. aStack->head = tmp1;
  438. tmp1->next = tmp2;
  439. return;
  440. }
  441. int
  442. stack_member ( struct vertex_stack *aStack, struct graph_vertex *v )
  443. {
  444. struct stack_node *tmp;
  445. tmp = aStack->head;
  446. while ( tmp != NULL )
  447. {
  448. if ( tmp->theVertex == v )
  449. return 1;
  450. tmp = tmp->next;
  451. }
  452. return 0;
  453. }
  454. int
  455. stack_empty ( struct vertex_stack *aStack )
  456. {
  457. if ( aStack == NULL || aStack->head == NULL )
  458. return 1;
  459. else
  460. return 0;
  461. }
  462. void
  463. stack_del ( struct vertex_stack *aStack, struct graph_vertex *v )
  464. {
  465. struct stack_node *tmp1, *tmp2;
  466. if ( aStack == NULL || aStack->head == NULL )
  467. return;
  468. if ( aStack->head->theVertex == v )
  469. {
  470. stack_pop ( aStack );
  471. return;
  472. }
  473. tmp1 = aStack->head->next;
  474. tmp2 = aStack->head;
  475. while ( tmp1 != NULL )
  476. {
  477. if ( tmp1->theVertex == v )
  478. {
  479. tmp2->next = tmp1->next;
  480. free ( tmp1 );
  481. return;
  482. }
  483. tmp1 = tmp1->next;
  484. }
  485. return;
  486. }
  487. /* is v1 a neighbor of v2*/
  488. int
  489. neighbor ( struct graph_vertex *v1, struct graph_vertex *v2 )
  490. {
  491. int i;
  492. for ( i = 0; i < v2->num_neighbors; i++ )
  493. {
  494. if ( v1 == v2->neighbors[i] )
  495. return 1;
  496. }
  497. return 0;
  498. }
  499. /* remove v1 from v2*/
  500. void
  501. removeNeighbor ( struct graph_vertex *v1, struct graph_vertex *v2 )
  502. {
  503. int i;
  504. for ( i = 0; i < v2->num_neighbors; i++ )
  505. {
  506. if ( v1 == v2->neighbors[i] )
  507. {
  508. v2->neighbors[i] = v2->neighbors[v2->num_neighbors - 1];
  509. v2->neighbors[v2->num_neighbors - 1] = NULL;
  510. v2->num_neighbors--;
  511. }
  512. }
  513. }
  514. struct graph_vertex *
  515. findOne ( struct vertex_stack *aStack )
  516. {
  517. struct stack_node *tmp;
  518. tmp = aStack->head;
  519. return aStack->head->theVertex;
  520. }
  521. void
  522. copyTree ( struct tNode *tree, struct genome_struct *labels, int *cpSC_lChild,
  523. int *cpSC_rChild, int *cpSC_parent, int num_genes )
  524. {
  525. int i;
  526. if ( tree == NULL )
  527. return;
  528. i = tree->tag;
  529. copyGenome ( tree->genome, &labels[i], num_genes );
  530. cpSC_lChild[i] = *tree->sc_lChild;
  531. cpSC_rChild[i] = *tree->sc_rChild;
  532. cpSC_parent[i] = *tree->sc_parent;
  533. if ( tree->lChild != NULL )
  534. copyTree ( tree->lChild, labels, cpSC_lChild, cpSC_rChild,
  535. cpSC_parent, num_genes );
  536. if ( tree->rChild != NULL )
  537. copyTree ( tree->rChild, labels, cpSC_lChild, cpSC_rChild,
  538. cpSC_parent, num_genes );
  539. return;
  540. }
  541. /* copy 1->2*/
  542. void
  543. copyGenome ( struct genome_struct *g1, struct genome_struct *g2,
  544. int num_genes )
  545. {
  546. int i;
  547. for ( i = 0; i < num_genes; i++ )
  548. {
  549. g2->genes[i] = g1->genes[i];
  550. }
  551. return;
  552. }
  553. void
  554. restoreTree ( struct tNode *tree, struct genome_struct *labels,
  555. int *cpSC_lChild, int *cpSC_rChild, int *cpSC_parent,
  556. int num_genes )
  557. {
  558. int i;
  559. if ( tree == NULL )
  560. return;
  561. i = tree->tag;
  562. copyGenome ( &labels[i], tree->genome, num_genes );
  563. *tree->sc_lChild = cpSC_lChild[i];
  564. *tree->sc_rChild = cpSC_rChild[i];
  565. *tree->sc_parent = cpSC_parent[i];
  566. if ( tree->lChild != NULL )
  567. restoreTree ( tree->lChild, labels, cpSC_lChild, cpSC_rChild,
  568. cpSC_parent, num_genes );
  569. if ( tree->rChild != NULL )
  570. restoreTree ( tree->rChild, labels, cpSC_lChild, cpSC_rChild,
  571. cpSC_parent, num_genes );
  572. return;
  573. }
  574. void
  575. createUF ( int n, struct union_set *us )
  576. {
  577. int i;
  578. for ( i = 0; i < n; i++ )
  579. {
  580. us[i].rank = 0;
  581. us[i].parent = i;
  582. }
  583. return;
  584. }
  585. void
  586. unionUF ( int i, int j, struct union_set *us )
  587. {
  588. if ( us[i].rank >= us[j].rank )
  589. {
  590. us[j].parent = i;
  591. if ( us[i].rank == us[j].rank )
  592. {
  593. us[i].rank = us[i].rank + 1;
  594. }
  595. }
  596. else
  597. {
  598. us[i].parent = j;
  599. }
  600. return;
  601. }
  602. int
  603. findUF ( int i, struct union_set *us )
  604. {
  605. int above;
  606. above = us[i].parent;
  607. while ( above != us[above].parent )
  608. {
  609. us[i].parent = us[above].parent;
  610. i = us[above].parent;
  611. above = us[i].parent;
  612. }
  613. return above;
  614. }
  615. int
  616. MST ( int num_genomes, int **distmatrix )
  617. {
  618. int i, j, k, i1, j1;
  619. int *count;
  620. edge_t **sortedges, *newedge, *edges;
  621. struct union_set *us;
  622. int picked, maxweight;
  623. int numedges;
  624. struct adj_struct *node = NULL, *tmp;
  625. struct adj_struct *adj_list;
  626. us = ( struct union_set * ) malloc ( sizeof ( struct union_set ) *
  627. num_genomes );
  628. createUF ( num_genomes, us );
  629. maxweight = -1;
  630. j = 0;
  631. for ( i = 0; i < num_genomes; i++ )
  632. {
  633. for ( j = i + 1; j < num_genomes; j++ )
  634. {
  635. if ( distmatrix[i][j] > maxweight )
  636. maxweight = distmatrix[i][j];
  637. }
  638. }
  639. maxweight += 1;
  640. count = ( int * ) malloc ( sizeof ( int ) * maxweight );
  641. sortedges = ( edge_t ** ) malloc ( sizeof ( edge_t * ) * maxweight );
  642. adj_list =
  643. ( struct adj_struct * ) malloc ( ( num_genomes ) *
  644. sizeof ( struct adj_struct ) );
  645. /* create adj-list */
  646. for ( i = 0; i < num_genomes; i++ )
  647. {
  648. if ( i == num_genomes - 1 )
  649. {
  650. adj_list[i].next = NULL;
  651. }
  652. else
  653. {
  654. adj_list[i].next =
  655. ( struct adj_struct * )
  656. malloc ( sizeof ( struct adj_struct ) );
  657. node = adj_list[i].next;
  658. }
  659. for ( j = i + 1; j < num_genomes; j++ )
  660. {
  661. node->weight = distmatrix[i][j];
  662. node->vertex = j;
  663. if ( j == num_genomes - 1 )
  664. node->next = NULL;
  665. else
  666. {
  667. node->next =
  668. ( struct adj_struct * )
  669. malloc ( sizeof ( struct adj_struct ) );
  670. node = node->next;
  671. }
  672. }
  673. }
  674. /* uses positive/negative indexing and so position 0 in arrays
  675. is always wasted */
  676. /* prepare a sorted list of edges (in increasing order of cost) */
  677. /* use distribution sort -- here ad hoc, since we only have 4 values:
  678. 0, 1, 2, and L = -largevalue */
  679. /* could just use an array of pointers since adj. lists are sorted... */
  680. /* tally values */
  681. for ( i = 0; i < maxweight; i++ )
  682. {
  683. count[i] = 0;
  684. }
  685. for ( i = 0; i < num_genomes; i++ )
  686. {
  687. node = adj_list[i].next;
  688. while ( node != NULL )
  689. {
  690. j = node->vertex;
  691. if ( i != j )
  692. {
  693. count[node->weight]++;
  694. }
  695. node = node->next;
  696. }
  697. }
  698. numedges = 0;
  699. for ( i = 0; i < maxweight; i++ )
  700. {
  701. numedges += count[i];
  702. }
  703. edges = ( edge_t * ) malloc ( sizeof ( edge_t ) * ( numedges + 10 ) );
  704. /* this is a place to merge with circular_ordering.c, to
  705. use the same amount of initialized edge**** together */
  706. sortedges[0] = edges;
  707. for ( i = 1; i < maxweight; i++ )
  708. {
  709. sortedges[i] = sortedges[i - 1] + count[i - 1];
  710. }
  711. for ( i = 0; i < num_genomes; i++ )
  712. {
  713. node = adj_list[i].next;
  714. while ( node != NULL )
  715. {
  716. j = node->vertex;
  717. if ( i != j )
  718. {
  719. k = node->weight;
  720. ( sortedges[k] )->edge1 = node;
  721. ( sortedges[k] )->I = i;
  722. ( sortedges[k] )->J = j;
  723. ( sortedges[k] )++;
  724. }
  725. node = node->next;
  726. }
  727. }
  728. for ( i = 0; i < num_genomes; i++ )
  729. {
  730. node = adj_list[i].next;
  731. while ( node != NULL )
  732. {
  733. tmp = node->next;
  734. free ( node );
  735. node = tmp;
  736. }
  737. }
  738. picked = 0;
  739. newedge = edges;
  740. while ( ( picked < num_genomes - 1 )
  741. && ( newedge < ( edges + numedges ) ) )
  742. {
  743. i = newedge->I;
  744. j = newedge->J;
  745. i1 = findUF ( i, us );
  746. j1 = findUF ( j, us );
  747. if ( i1 != j1 )
  748. {
  749. picked++;
  750. unionUF ( i1, j1, us );
  751. printf ( "%d->%d\n", i, j );
  752. }
  753. newedge++;
  754. }
  755. free ( edges );
  756. free ( adj_list );
  757. free ( count );
  758. free ( sortedges );
  759. return distmatrix[i][j];
  760. }
  761. /* Triangulate the threshold graph: */
  762. void
  763. triangulate ( int **dist, int **origDist, double w, int num )
  764. {
  765. struct graph_vertex **vertexList;
  766. struct graph_vertex **neighborV;
  767. struct vertex_stack *aStack;
  768. struct stack_node *tmp1;
  769. struct graph_vertex *tmpV1;
  770. int count = 0, i, j, k, n1, n2;
  771. double gTriWidth = 100.0;
  772. double max_width = 0;
  773. double min_cost = FLT_MAX;
  774. int min_cost_edges = 0;
  775. int best_v = -1;
  776. int *tax2pos, *pos2tax;
  777. int *intersected, *tmpArray;
  778. double width = 0;
  779. double cost = 0;
  780. int num_edges = 0;
  781. double d_ab;
  782. double over;
  783. pos2tax = ( int * ) malloc ( sizeof ( int ) * num );
  784. tax2pos = ( int * ) malloc ( sizeof ( int ) * num );
  785. intersected = ( int * ) malloc ( sizeof ( int ) * ( num + 2 ) );
  786. tmpArray = ( int * ) malloc ( sizeof ( int ) * ( num + 2 ) );
  787. vertexList =
  788. ( struct graph_vertex ** ) malloc ( sizeof ( struct graph_vertex * ) *
  789. num );
  790. for ( i = 0; i < num; i++ )
  791. {
  792. vertexList[i] = createVertex ( num, i );
  793. }
  794. for ( i = 0; i < num; i++ )
  795. {
  796. for ( j = 0; j < num; j++ )
  797. {
  798. if ( dist[i][j] > 0 && j != vertexList[i]->name )
  799. addNeighbor ( vertexList[j], vertexList[i] );
  800. }
  801. }
  802. aStack = createStack ( );
  803. for ( i = 0; i < num; i++ )
  804. {
  805. stack_push ( aStack, vertexList[i] );
  806. }
  807. for ( i = 0; i < num; i++ )
  808. pos2tax[i] = 0;
  809. while ( !stack_empty ( aStack ) )
  810. {
  811. min_cost = FLT_MAX;
  812. min_cost_edges = 0;
  813. best_v = -1;
  814. tmp1 = aStack->head;
  815. while ( tmp1 != NULL )
  816. {
  817. tmpV1 = tmp1->theVertex;
  818. width = 0;
  819. cost = 0;
  820. num_edges = 0;
  821. intersectNodes ( tmpV1, aStack, num, intersected, tmpArray );
  822. i = 0;
  823. while ( i < num && intersected[i] > 0 )
  824. {
  825. j = i + 1;
  826. while ( j < num && intersected[j] > 0 )
  827. {
  828. if ( neighbor ( vertexList[intersected[i]],
  829. vertexList[intersected[j]] ) )
  830. {
  831. d_ab = dist[intersected[i]][intersected[j]];
  832. num_edges++;
  833. over = d_ab - w;
  834. cost = over;
  835. if ( d_ab > width )
  836. width = d_ab;
  837. if ( cost > min_cost )
  838. goto CONTINUE;
  839. }
  840. j++;
  841. }
  842. i++;
  843. }
  844. if ( cost < min_cost )
  845. {
  846. max_width = width;
  847. min_cost = cost;
  848. min_cost_edges = num_edges;
  849. best_v = tmp1->theVertex->name;
  850. if ( cost == 0 )
  851. break;
  852. }
  853. CONTINUE:;
  854. tmp1 = tmp1->next;
  855. }
  856. pos2tax[count] = best_v;
  857. tax2pos[best_v] = count;
  858. count++;
  859. if ( min_cost )
  860. {
  861. if ( max_width > gTriWidth )
  862. gTriWidth = max_width;
  863. neighborV = vertexList[best_v]->neighbors;
  864. i = vertexList[best_v]->num_neighbors;
  865. j = 0;
  866. while ( j < i )
  867. {
  868. if ( stack_member ( aStack, neighborV[j] ) )
  869. {
  870. k = j + 1;
  871. while ( k < i )
  872. {
  873. if ( stack_member ( aStack, neighborV[k] )
  874. && !neighbor ( neighborV[j], neighborV[k] ) )
  875. {
  876. n1 = neighborV[j]->name;
  877. n2 = neighborV[k]->name;
  878. dist[n1][n2] = origDist[n1][n2];
  879. addNeighbor ( vertexList[n1], vertexList[n2] );
  880. addNeighbor ( vertexList[n2], vertexList[n1] );
  881. min_cost_edges--;
  882. if ( min_cost_edges == 0 )
  883. goto OUT;
  884. }
  885. k++;
  886. }
  887. }
  888. j++;
  889. }
  890. }
  891. OUT:;
  892. stack_del ( aStack, vertexList[best_v] );
  893. }
  894. for ( i = 0; i < num; i++ )
  895. {
  896. free ( vertexList[i]->neighbors );
  897. free ( vertexList[i] );
  898. }
  899. free ( vertexList );
  900. while ( aStack->head != NULL )
  901. {
  902. stack_pop ( aStack );
  903. }
  904. free ( intersected );
  905. free ( tmpArray );
  906. free ( tax2pos );
  907. free ( pos2tax );
  908. }
  909. void
  910. intersectNodes ( struct graph_vertex *v, struct vertex_stack *aStack,
  911. int num, int *intersected, int *tmpArray )
  912. {
  913. int i, numN;
  914. struct stack_node *tmp;
  915. numN = v->num_neighbors;
  916. for ( i = 0; i < num; i++ )
  917. {
  918. tmpArray[i] = 0;
  919. }
  920. for ( i = 0; i < numN; i++ )
  921. {
  922. tmpArray[v->neighbors[i]->name] = 1;
  923. }
  924. tmp = aStack->head;
  925. while ( tmp != NULL )
  926. {
  927. tmpArray[tmp->theVertex->name]++;
  928. tmp = tmp->next;
  929. }
  930. numN = 0;
  931. for ( i = 0; i < num; i++ )
  932. {
  933. if ( tmpArray[i] == 2 )
  934. {
  935. intersected[numN] = i;
  936. numN++;
  937. }
  938. }
  939. intersected[numN] = -1;
  940. return;
  941. }