PageRenderTime 60ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/Bio/PhyloNetwork.pm

https://bitbucket.org/antismash/clusean
Perl | 1803 lines | 1387 code | 277 blank | 139 comment | 138 complexity | c363c1e009654de9f626d79aa4d3d22c MD5 | raw file
  1. #
  2. # Module for Bio::PhyloNetwork
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Gabriel Cardona <gabriel(dot)cardona(at)uib(dot)es>
  7. #
  8. # Copyright Gabriel Cardona, Gabriel Valiente
  9. #
  10. # You may distribute this module under the same terms as perl itself
  11. # POD documentation - main docs before the code
  12. =head1 NAME
  13. Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
  14. =head1 SYNOPSIS
  15. use Bio::PhyloNetwork;
  16. # Create a PhyloNetwork object from a eNewick string
  17. my $net1=Bio::PhyloNetwork->new(
  18. -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
  19. );
  20. # Print all available data
  21. print $net1;
  22. # Rebuild $net1 from its mu_data
  23. my %mudata=$net1->mudata();
  24. my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
  25. print $net2;
  26. print "d=".$net1->mu_distance($net2)."\n";
  27. # Get another one and compute distance
  28. my $net3=Bio::PhyloNetwork->new(
  29. -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
  30. );
  31. print "d=".$net1->mu_distance($net3)."\n";
  32. # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
  33. my ($weight,%alignment)=$net1->optimal_alignment($net3);
  34. print "weight:$weight\n";
  35. foreach my $node1 (keys %alignment) {
  36. print "$node1 => ".$alignment{$node1}."\n";
  37. }
  38. # ...or the Hamming distance
  39. my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
  40. print "weight:$weightH\n";
  41. foreach my $node1 (keys %alignmentH) {
  42. print "$node1 => ".$alignmentH{$node1}."\n";
  43. }
  44. # Test for time consistency of $net1
  45. if ($net1->is_time_consistent) {
  46. print "net1 is time consistent\n"
  47. }
  48. else {
  49. print "net1 is not time consistent\n"
  50. }
  51. # create a network from the list of edges
  52. my $net4=Bio::PhyloNetwork->new(-edges=>
  53. [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
  54. # Test for time consistency of $net3
  55. if ($net4->is_time_consistent) {
  56. print "net4 is time consistent\n"
  57. }
  58. else {
  59. print "net4 is not time consistent\n"
  60. }
  61. # And print all information on net4
  62. print $net4;
  63. # Compute some tripartitions
  64. my %triparts=$net1->tripartitions();
  65. # Now these are stored
  66. print $net1;
  67. # And can compute the tripartition error
  68. print "dtr=".$net1->tripartition_error($net3)."\n";
  69. =head1 DESCRIPTION
  70. =head2 Phylogenetic Networks
  71. This is a module to work with phylogenetic networks. Phylogenetic networks
  72. have been studied over the last years as a richer model of the evolutionary
  73. history of sets of organisms than phylogenetic trees, because they take not
  74. only mutation events but also recombination and horizontal gene transfer
  75. events into account.
  76. The natural model for describing the evolutionary
  77. history of a set of sequences under recombination events is a DAG, hence
  78. this package relies on the package Graph::Directed to represent the
  79. underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
  80. for formal definitions related to phylogenetic networks.
  81. =head2 eNewick description
  82. With this package, phylogenetic networks can be given by its eNewick
  83. string. This description appeared in other packages related to
  84. phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
  85. packages use different descriptions. The Bio::PhyloNetwork
  86. package allows both of them, but uses the second one in its output.
  87. The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
  88. parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
  89. nodes; let each of the first k copies be a child of one of the u_1,...,u_k
  90. (one for each) and have no children (hence we will have k extra leaves);
  91. as for the last copy, let it have no parents and have v_1,...,v_l be its
  92. children. This way we get a forest; each of the trees will be rooted at either
  93. one root of the phylogenetic network or a hybrid node of it; the set of leaves
  94. (of the whole forest) will be the set of leaves of the original network
  95. together with the set of hybrid nodes (each of them repeated as many times
  96. as its in-degree). Then, the eNewick representation of the phylogenetic network
  97. will be the Newick representation of all the trees in the obtained forest,
  98. each of them with its root labeled.
  99. The second approach [NetGen] goes as follows: For each hybrid node H, say with
  100. parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
  101. nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
  102. its children; let the other copies be child of u_2,...,u_k (one for each)
  103. and have no children. This way, we get a tree whose set of leaves is the
  104. set of leaves of the original network together with the set of hybrid nodes
  105. (possibly repeated). Then the Newick string of the obtained tree (note that
  106. some internal nodes will be labeled and some leaves will be repeated) is
  107. the eNewick string of the phylogenetic network.
  108. For example, consider the network depicted below:
  109. r
  110. / \
  111. / \
  112. U V
  113. / \ / \
  114. 1 \ / 3
  115. H
  116. |
  117. 2
  118. If the first approach is taken, we get the forest:
  119. r
  120. / \
  121. / \
  122. U V
  123. / \ / \
  124. 1 H H 3
  125. |
  126. H
  127. |
  128. 2
  129. Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
  130. As for the second one, one gets the tree:
  131. r
  132. / \
  133. / \
  134. U V
  135. / \ / \
  136. 1 H | 3
  137. H
  138. |
  139. 2
  140. Hence, the eNewick string is '((1,H),((2)H,3))r;'.
  141. Note: when rooting a tree, this package allows the notations
  142. '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
  143. the first one is used when writing eNewick strings.
  144. =head2 Tree-child phylogenetic networks
  145. Tree-child (TC) phylogenetic networks are a special class of phylogenetic
  146. networks for which a distance, called mu-distance, is defined [CRV2]
  147. based on certain data (mu-data) associated to every node.
  148. Moreover, this distance extends the
  149. Robinson-Foulds on phylogenetic trees. This package allows testing for a
  150. phylogenetic network if it is TC and computes mu-distances between networks
  151. over the same set of leaves.
  152. Moreover, the mu-data allows to define the optimal
  153. (in some precise sense) alignment between networks
  154. over the same set of leaves. This package also computes this optimal alignment.
  155. =head2 Tripartitions
  156. Although tripartitions (see [CRV1] and the references therein) do not allow
  157. to define distances, this package outputs tripartitions and computes a weak
  158. form of the tripartition error.
  159. =head2 Time-consistency
  160. Another useful property of Phylogenetic Networks that appears in the literature
  161. is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
  162. network admits a temporal representation if it can be drawn in such a way
  163. that tree arcs (those whose end is a tree node) are inclined downwards, while
  164. hybridization arcs (those whose end is a hybrid node) are horizontal.
  165. This package checks for time-consistency and, if so, a temporal representation
  166. is provided.
  167. =head1 AUTHOR
  168. Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
  169. Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
  170. =head1 SEE ALSO
  171. =over
  172. =item [CRV1]
  173. G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
  174. discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
  175. =item [CRV2]
  176. G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
  177. Tree-Child Phylogenetic Networks. Preprint.
  178. =item [NetGen]
  179. M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
  180. with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
  181. =item [PhyloNet]
  182. PhyloNet: "Phylogenetic Networks Toolkit".
  183. http://bioinfo.cs.rice.edu/phylonet
  184. =item [BSS]
  185. M. Baroni, C. Semple, and M. Steel. Hybrids in Real
  186. Time. Syst. Biol. 55(1):46-56, 2006
  187. =back
  188. =head1 APPENDIX
  189. The rest of the documentation details each of the object methods.
  190. =cut
  191. package Bio::PhyloNetwork;
  192. use strict;
  193. use warnings;
  194. use base qw(Bio::Root::Root);
  195. use Bio::PhyloNetwork::muVector;
  196. use Graph::Directed;
  197. use Bio::TreeIO;
  198. use Bio::Tree::Node;
  199. use IO::String;
  200. use Array::Compare;
  201. use Algorithm::Munkres;
  202. # Creator
  203. =head2 new
  204. Title : new
  205. Usage : my $obj = new Bio::PhyloNetwork();
  206. Function: Creates a new Bio::PhyloNetwork object
  207. Returns : Bio::PhyloNetwork
  208. Args : none
  209. OR
  210. -eNewick => string
  211. OR
  212. -graph => Graph::Directed object
  213. OR
  214. -edges => reference to an array
  215. OR
  216. -tree => Bio::Tree::Tree object
  217. OR
  218. -mudata => reference to a hash,
  219. -leaves => reference to an array
  220. OR
  221. -mudata => reference to a hash,
  222. -numleaves => integer
  223. Returns a Bio::PhyloNetwork object, created according to the data given:
  224. =over 3
  225. =item new()
  226. creates an empty network.
  227. =item new(-eNewick =E<gt> $str)
  228. creates the network whose
  229. Extended Newick representation (see description above) is the string $str.
  230. =item new(-graph =E<gt> $graph)
  231. creates the network with underlying
  232. graph given by the Graph::Directed object $graph
  233. =item new(-tree =E<gt> $tree)
  234. creates a network as a copy of the
  235. Bio::Tree::Tree object in $tree
  236. =item new(-mudata =E<gt> \%mudata, -leaves =E<gt> \@leaves)
  237. creates the network by reconstructing it from its mu-data stored in
  238. \%mudata and with set of leaves in \@leaves.
  239. =item new(-mudata =E<gt> \%mudata, -numleaves =E<gt> $numleaves)
  240. creates the network by reconstructing it from its mu-data stored in
  241. \%mudata and with set of leaves in ("l1".."l$numleaves").
  242. =back
  243. =cut
  244. sub new {
  245. my ($pkg,@args)=@_;
  246. my $self=$pkg->SUPER::new(@args);
  247. my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
  248. $self->_rearrange([qw(ENEWICK
  249. EDGES
  250. LEAVES
  251. NUMLEAVES
  252. GRAPH
  253. TREE
  254. MUDATA)],@args);
  255. bless($self,$pkg);
  256. $self->build_from_eNewick($eNewick) if defined $eNewick;
  257. $self->build_from_edges(@$edgesR) if defined $edgesR;
  258. $self->build_from_graph($graph) if defined $graph;
  259. $self->build_from_tree($tree) if defined $tree;
  260. if ((! defined $leavesR) && (defined $numleaves)) {
  261. my @leaves=map {"l$_"} (1..$numleaves);
  262. $leavesR=\@leaves;
  263. }
  264. $self->build_from_mudata($mudataR,$leavesR)
  265. if ((defined $mudataR) && (defined $leavesR));
  266. return $self;
  267. }
  268. # Builders
  269. sub build_from_edges {
  270. my ($self,@edges)=@_;
  271. my $graph=Graph::Directed->new();
  272. $graph->add_edges(@edges);
  273. $self->{graph}=$graph;
  274. $self->recompute();
  275. my $labels={};
  276. foreach my $node ($self->nodes()) {
  277. $labels->{$node}=$node;
  278. }
  279. $self->{labels}=$labels;
  280. }
  281. sub build_from_graph {
  282. my ($self,$graph)=@_;
  283. my $graphcp=$graph->copy();
  284. $self->{graph}=$graphcp;
  285. $self->recompute();
  286. my $labels={};
  287. foreach my $node ($self->nodes()) {
  288. $labels->{$node}=$node;
  289. }
  290. $self->{labels}=$labels;
  291. }
  292. my $_eN_index;
  293. sub build_from_eNewick {
  294. my ($self,$string)=@_;
  295. $_eN_index=0;
  296. my $graph=Graph::Directed->new();
  297. my $labels={};
  298. my @blocks=split(/; */,$string);
  299. foreach my $block (@blocks) {
  300. my ($rt,$str)=get_root_and_subtree($block);
  301. my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
  302. process_block($graph,$labels,$block,$rtid);
  303. $labels->{$rtid}=$rtlbl.'';
  304. }
  305. $self->{graph}=$graph;
  306. $self->{labels}=$labels;
  307. $self->recompute();
  308. }
  309. sub process_block {
  310. my ($graph,$labels,$block,$rtid)=@_;
  311. my ($rt,$str)=get_root_and_subtree($block);
  312. my @substrs=my_split($str);
  313. foreach my $substr (@substrs) {
  314. my ($subrt,$subblock)=get_root_and_subtree($substr);
  315. my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
  316. get_label_type_id_length($subrt);
  317. if (! $subrtlng eq '') {
  318. $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
  319. }
  320. else {
  321. $graph->add_edges($rtid,$subrtid);
  322. }
  323. if (! $subrttype eq '') {
  324. $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
  325. }
  326. $subrtlbl.='';
  327. # if (! $subrtlbl eq '') {
  328. if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
  329. $labels->{$subrtid}=$subrtlbl;
  330. } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
  331. # error
  332. die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
  333. }
  334. # }
  335. if ($subblock ne "") {
  336. process_block($graph,$labels,$subblock,$subrtid);
  337. }
  338. }
  339. }
  340. sub get_root_and_subtree {
  341. my ($block)=@_;
  342. my ($rt,$str)=("","");
  343. # ($rt,$str)=split(/:|=/,$block);
  344. ($rt,$str)=split(/=/,$block);
  345. if ($rt eq $block) {
  346. # try to look for root label at the end
  347. my $pos=length($rt)-1;
  348. while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
  349. $pos--;
  350. }
  351. $rt=substr($block,$pos+1,length($block)-$pos);
  352. $str=substr($block,0,$pos+1);
  353. }
  354. $rt=trim($rt);
  355. $str=trim($str);
  356. return ($rt,$str);
  357. }
  358. sub get_label_type_id_length {
  359. my ($string) = @_;
  360. $string.='';
  361. # print "$string\n";
  362. if (index($string,'#')==-1) {
  363. # no hybrid
  364. my ($label,$length)=split(':',$string);
  365. $label.='';
  366. my $id;
  367. if ((! defined $label) || ($label eq '')) {
  368. # create id
  369. $_eN_index++;
  370. $id="T$_eN_index";
  371. } else {
  372. $id=$label;
  373. }
  374. return ($label,'',$id,$length);
  375. }
  376. else {
  377. # hybrid
  378. my ($label,$string2)=split('#',$string);
  379. my ($typeid,$length)=split(':',$string2);
  380. my $type=$typeid;
  381. $type =~ s/\d//g;
  382. my $id=$typeid;
  383. $id =~ s/\D//g;
  384. return ($label,$type,'#'.$id,$length);
  385. }
  386. }
  387. sub trim
  388. {
  389. my ($string) = @_;
  390. $string =~ s/^\s+//;
  391. $string =~ s/\s+$//;
  392. return $string;
  393. }
  394. sub my_split {
  395. my ( $string ) = @_;
  396. my $temp="";
  397. my @substrings;
  398. my $level=1;
  399. for my $i ( 1 .. length( $string ) ) {
  400. my $char=substr($string,$i,1);
  401. if ($char eq "(") {
  402. $level++;
  403. }
  404. if ($char eq ")") {
  405. if ($level==1) {
  406. push @substrings, $temp;
  407. $temp="";
  408. }
  409. $level--;
  410. }
  411. if (($char eq ",") && ($level==1)) {
  412. push @substrings, $temp;
  413. $temp="";
  414. $char="";
  415. }
  416. $temp = $temp.$char;
  417. }
  418. return @substrings;
  419. }
  420. sub build_from_mudata {
  421. my ($self,$mus,$leavesR)=@_;
  422. my $graph=Graph::Directed->new();
  423. my @nodes=keys %{$mus};
  424. my @leaves=@{$leavesR};
  425. my %seen;
  426. my @internal;
  427. @seen{@leaves} = ();
  428. foreach my $node (@nodes) {
  429. push(@internal, $node) unless exists $seen{$node};
  430. }
  431. @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  432. @nodes=(@internal,@leaves);
  433. my $numnodes=@nodes;
  434. for (my $i=0;$i<$numnodes;$i++) {
  435. my $mu=$mus->{$nodes[$i]};
  436. my $j=$i+1;
  437. while ($mu->is_positive() && $j<$numnodes) {
  438. if ($mu->geq_poset($mus->{$nodes[$j]})) {
  439. $graph->add_edges(($nodes[$i],$nodes[$j]));
  440. $mu = $mu - $mus->{$nodes[$j]};
  441. }
  442. $j++;
  443. }
  444. }
  445. $self->build_from_graph($graph);
  446. }
  447. # sub relabel_tree {
  448. # my ($tree)=@_;
  449. # my $i=1;
  450. # my $j=1;
  451. # my $root=$tree->get_root_node();
  452. # foreach my $node ($tree->get_nodes()) {
  453. # if ($node == $root) {
  454. # $node->{'_id'}="r";
  455. # }
  456. # elsif (! $node->is_Leaf) {
  457. # $node->{'_id'}="t$i";
  458. # $i++;
  459. # }
  460. # else {
  461. # if ($node->{'_id'} eq "") {
  462. # $node->{'_id'}="l$j";
  463. # $j++;
  464. # }
  465. # }
  466. # }
  467. # return $tree;
  468. # }
  469. # sub build_subtree {
  470. # my ($graph,$root)=@_;
  471. # foreach my $child ($root->each_Descendent) {
  472. # $graph->add_edge($root->id,$child->id);
  473. # $graph=build_subtree($graph,$child);
  474. # }
  475. # return $graph;
  476. # }
  477. sub build_from_tree {
  478. my ($self,$tree)=@_;
  479. # relabel_tree($tree);
  480. # my $treeroot=$tree->get_root_node;
  481. # my $graph=Graph::Directed->new();
  482. # $graph=build_subtree($graph,$treeroot);
  483. # $self->build_from_graph($graph);
  484. my $str;
  485. my $io=IO::String->new($str);
  486. my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
  487. $treeio->write_tree($tree);
  488. # print "intern: $str\n";
  489. $self->build_from_eNewick($str);
  490. }
  491. sub recompute {
  492. my ($self)=@_;
  493. $self->throw("Graph is not DAG:".$self->{graph})
  494. unless $self->{graph}->is_dag();
  495. my @leaves=$self->{graph}->successorless_vertices();
  496. @leaves=sort @leaves;
  497. my $numleaves=@leaves;
  498. my @roots=$self->{graph}->predecessorless_vertices();
  499. my $numroots=@roots;
  500. #$self->throw("Graph is not rooted") unless ($numroots == 1);
  501. my @nodes=$self->{graph}->vertices();
  502. @nodes=sort @nodes;
  503. my $numnodes=@nodes;
  504. foreach my $node (@nodes) {
  505. if (! defined $self->{labels}->{$node}) {
  506. $self->{labels}->{$node}='';
  507. }
  508. }
  509. $self->{leaves}=\@leaves;
  510. $self->{numleaves}=$numleaves;
  511. $self->{roots}=\@roots;
  512. $self->{numroots}=$numroots;
  513. $self->{nodes}=\@nodes;
  514. $self->{numnodes}=$numnodes;
  515. $self->{mudata}={};
  516. $self->{h}={};
  517. $self->compute_height();
  518. $self->compute_mu();
  519. return $self;
  520. }
  521. # Hybridizing
  522. sub is_attackable {
  523. my ($self,$u1,$v1,$u2,$v2)=@_;
  524. if ( $self->is_hybrid_node($v1) ||
  525. $self->is_hybrid_node($v2) ||
  526. $self->graph->is_reachable($v2,$u1) ||
  527. (($u1 eq $u2)&&($v1 eq $v2)) ||
  528. (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
  529. $self->graph->successors($u2)))
  530. {
  531. return 0;
  532. }
  533. return 1;
  534. }
  535. sub do_attack {
  536. my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
  537. my $graph=$self->{graph};
  538. $graph->delete_edge($u1,$v1);
  539. $graph->delete_edge($u2,$v2);
  540. $graph->add_edge($u1,"T$lbl");
  541. $graph->add_edge("T$lbl",$v1);
  542. $graph->add_edge($u2,"#H$lbl");
  543. $graph->add_edge("#H$lbl",$v2);
  544. $graph->add_edge("T$lbl","#H$lbl");
  545. $self->build_from_graph($graph);
  546. }
  547. # Computation of mu-data
  548. sub compute_mu {
  549. my ($self)=@_;
  550. my $graph=$self->{graph};
  551. my $mudata=$self->{mudata};
  552. my @leaves=@{$self->{leaves}};
  553. my $numleaves=$self->{numleaves};
  554. for (my $i=0;$i<$numleaves;$i++) {
  555. my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
  556. $vec->[$i]=1;
  557. $mudata->{$leaves[$i]}=$vec;
  558. }
  559. my $h=1;
  560. while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
  561. {
  562. foreach my $u (@nodes) {
  563. my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
  564. foreach my $son ($graph->successors($u)) {
  565. $vec+=$mudata->{$son};
  566. }
  567. $mudata->{$u}=$vec;
  568. }
  569. $h++;
  570. }
  571. }
  572. sub compute_height {
  573. my ($self)=@_;
  574. my $graph=$self->{graph};
  575. my @leaves=@{$self->{leaves}};
  576. foreach my $leaf (@leaves) {
  577. $self->{h}->{$leaf}=0;
  578. }
  579. my $h=0;
  580. while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
  581. @{$self->{nodes}} )
  582. {
  583. foreach my $node (@nodes) {
  584. foreach my $parent ($graph->predecessors($node)) {
  585. $self->{h}->{$parent}=$h+1;
  586. }
  587. }
  588. $h++;
  589. }
  590. }
  591. # Tests
  592. =head2 is_leaf
  593. Title : is_leaf
  594. Usage : my $b=$net->is_leaf($u)
  595. Function: tests if $u is a leaf in $net
  596. Returns : boolean
  597. Args : scalar
  598. =cut
  599. sub is_leaf {
  600. my ($self,$node)=@_;
  601. if ($self->{graph}->out_degree($node) == 0) {return 1;}
  602. return 0;
  603. }
  604. =head2 is_root
  605. Title : is_root
  606. Usage : my $b=$net->is_root($u)
  607. Function: tests if $u is the root of $net
  608. Returns : boolean
  609. Args : scalar
  610. =cut
  611. sub is_root {
  612. my ($self,$node)=@_;
  613. if ($self->{graph}->in_degree($node) == 0) {return 1;}
  614. return 0;
  615. }
  616. =head2 is_tree_node
  617. Title : is_tree_node
  618. Usage : my $b=$net->is_tree_node($u)
  619. Function: tests if $u is a tree node in $net
  620. Returns : boolean
  621. Args : scalar
  622. =cut
  623. sub is_tree_node {
  624. my ($self,$node)=@_;
  625. if ($self->{graph}->in_degree($node) <= 1) {return 1;}
  626. return 0;
  627. }
  628. =head2 is_hybrid_node
  629. Title : is_hybrid_node
  630. Usage : my $b=$net->is_hybrid_node($u)
  631. Function: tests if $u is a hybrid node in $net
  632. Returns : boolean
  633. Args : scalar
  634. =cut
  635. sub is_hybrid_node {
  636. my ($self,$node)=@_;
  637. if ($self->{graph}->in_degree($node) > 1) {return 1;}
  638. return 0;
  639. }
  640. sub has_tree_child {
  641. # has_tree_child(g,u) returns 1 if u has a tree child in graph g
  642. # and 0 otherwise
  643. my $g=shift(@_);
  644. my $node=shift(@_);
  645. my @Sons=$g->successors($node);
  646. foreach my $son (@Sons) {
  647. if ($g->in_degree($son)==1) {
  648. return 1;
  649. }
  650. }
  651. return 0;
  652. }
  653. =head2 is_tree_child
  654. Title : is_tree_child
  655. Usage : my $b=$net->is_tree_child()
  656. Function: tests if $net is a Tree-Child phylogenetic network
  657. Returns : boolean
  658. Args : Bio::PhyloNetwork
  659. =cut
  660. sub is_tree_child {
  661. my ($self)=@_;
  662. if (defined $self->{is_tree_child}) {
  663. return $self->{is_tree_child};
  664. }
  665. $self->{is_tree_child}=0;
  666. my $graph=$self->{graph};
  667. foreach my $node (@{$self->{nodes}}) {
  668. return 0 unless ($graph->out_degree($node)==0 ||
  669. has_tree_child($graph,$node));
  670. }
  671. $self->{is_tree_child}=1;
  672. return 1;
  673. }
  674. # Accessors
  675. =head2 nodes
  676. Title : nodes
  677. Usage : my @nodes=$net->nodes()
  678. Function: returns the set of nodes of $net
  679. Returns : array
  680. Args : none
  681. =cut
  682. sub nodes {
  683. my ($self)=@_;
  684. return @{$self->{nodes}};
  685. }
  686. =head2 leaves
  687. Title : leaves
  688. Usage : my @leaves=$net->leaves()
  689. Function: returns the set of leaves of $net
  690. Returns : array
  691. Args : none
  692. =cut
  693. sub leaves {
  694. my ($self)=@_;
  695. return @{$self->{leaves}};
  696. }
  697. =head2 roots
  698. Title : roots
  699. Usage : my @roots=$net->roots()
  700. Function: returns the set of roots of $net
  701. Returns : array
  702. Args : none
  703. =cut
  704. sub roots {
  705. my ($self)=@_;
  706. return @{$self->{roots}};
  707. }
  708. =head2 internal_nodes
  709. Title : internal_nodes
  710. Usage : my @internal_nodes=$net->internal_nodes()
  711. Function: returns the set of internal nodes of $net
  712. Returns : array
  713. Args : none
  714. =cut
  715. sub internal_nodes {
  716. my ($self)=@_;
  717. return grep {! $self->is_leaf($_)} $self->nodes();
  718. }
  719. =head2 tree_nodes
  720. Title : tree_nodes
  721. Usage : my @tree_nodes=$net->tree_nodes()
  722. Function: returns the set of tree nodes of $net
  723. Returns : array
  724. Args : none
  725. =cut
  726. sub tree_nodes {
  727. my ($self)=@_;
  728. return grep {$self->is_tree_node($_)} $self->nodes();
  729. }
  730. =head2 hybrid_nodes
  731. Title : hybrid_nodes
  732. Usage : my @hybrid_nodes=$net->hybrid_nodes()
  733. Function: returns the set of hybrid nodes of $net
  734. Returns : array
  735. Args : none
  736. =cut
  737. sub hybrid_nodes {
  738. my ($self)=@_;
  739. return grep {$self->is_hybrid_node($_)} $self->nodes();
  740. }
  741. =head2 graph
  742. Title : graph
  743. Usage : my $graph=$net->graph()
  744. Function: returns the underlying graph of $net
  745. Returns : Graph::Directed
  746. Args : none
  747. =cut
  748. sub graph {
  749. my ($self)=@_;
  750. return $self->{graph};
  751. }
  752. =head2 edges
  753. Title : edges
  754. Usage : my @edges=$net->edges()
  755. Function: returns the set of edges of $net
  756. Returns : array
  757. Args : none
  758. Each element in the array is an anonimous array whose first element is the
  759. head of the edge and the second one is the tail.
  760. =cut
  761. sub edges {
  762. my ($self)=@_;
  763. return $self->{graph}->edges();
  764. }
  765. =head2 tree_edges
  766. Title : tree_edges
  767. Usage : my @tree_edges=$net->tree_edges()
  768. Function: returns the set of tree edges of $net
  769. (those whose tail is a tree node)
  770. Returns : array
  771. Args : none
  772. =cut
  773. sub tree_edges {
  774. my ($self)=@_;
  775. return grep {$self->is_tree_node($_->[1])} $self->edges();
  776. }
  777. =head2 hybrid_edges
  778. Title : hybrid_edges
  779. Usage : my @hybrid_edges=$net->hybrid_edges()
  780. Function: returns the set of hybrid edges of $net
  781. (those whose tail is a hybrid node)
  782. Returns : array
  783. Args : none
  784. =cut
  785. sub hybrid_edges {
  786. my ($self)=@_;
  787. return grep {$self->is_hybrid_node($_->[1])} $self->edges();
  788. }
  789. =head2 explode
  790. Title : explode
  791. Usage : my @trees=$net->explode()
  792. Function: returns the representation of $net by a set of
  793. Bio::Tree:Tree objects
  794. Returns : array
  795. Args : none
  796. =cut
  797. sub explode {
  798. my ($self)=@_;
  799. my @trees;
  800. $self->explode_rec(\@trees);
  801. return @trees;
  802. }
  803. sub explode_rec {
  804. my ($self,$trees)=@_;
  805. my @h = $self->hybrid_nodes;
  806. if (scalar @h) {
  807. my $v = shift @h;
  808. for my $u ($self->{graph}->predecessors($v)) {
  809. $self->{graph}->delete_edge($u,$v);
  810. $self->explode_rec($trees);
  811. $self->{graph}->add_edge($u,$v);
  812. }
  813. } else {
  814. my $io = IO::String->new($self->eNewick);
  815. my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
  816. my $tree = $treeio->next_tree;
  817. $tree->contract_linear_paths;
  818. push @{$trees}, $tree;
  819. }
  820. }
  821. =head2 mudata
  822. Title : mudata
  823. Usage : my %mudata=$net->mudata()
  824. Function: returns the representation of $net by its mu-data
  825. Returns : hash
  826. Args : none
  827. $net-E<gt>mudata() returns a hash with keys the nodes of $net and each value is a
  828. muVector object holding its mu-vector.
  829. =cut
  830. sub mudata {
  831. my ($self)=@_;
  832. return %{$self->{mudata}};
  833. }
  834. sub mudata_node {
  835. my ($self,$u)=@_;
  836. return $self->{mudata}{$u};
  837. }
  838. =head2 heights
  839. Title : heights
  840. Usage : my %heights=$net->heights()
  841. Function: returns the heights of the nodes of $net
  842. Returns : hash
  843. Args : none
  844. $net-E<gt>heights() returns a hash with keys the nodes of $net and each value
  845. is its height.
  846. =cut
  847. sub heights {
  848. my ($self)=@_;
  849. return %{$self->{h}};
  850. }
  851. sub height_node {
  852. my ($self,$u)=@_;
  853. return $self->{h}{$u};
  854. }
  855. =head2 mu_distance
  856. Title : mu_distance
  857. Usage : my $dist=$net1->mu_distance($net2)
  858. Function: Computes the mu-distance between the networks $net1 and $net2 on
  859. the same set of leaves
  860. Returns : scalar
  861. Args : Bio::PhyloNetwork
  862. =cut
  863. sub mu_distance {
  864. my ($net1,$net2)=@_;
  865. my @nodes1=$net1->nodes;
  866. my @nodes2=$net2->nodes;
  867. my $comp = Array::Compare->new;
  868. $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
  869. unless $comp->compare($net1->{leaves},$net2->{leaves});
  870. $net1->warn("Not a tree-child phylogenetic network")
  871. unless $net1->is_tree_child();
  872. $net2->warn("Not a tree-child phylogenetic network")
  873. unless $net2->is_tree_child();
  874. my @leaves=@{$net1->{leaves}};
  875. my %matched1;
  876. my %matched2;
  877. OUTER: foreach my $node1 (@nodes1) {
  878. foreach my $node2 (@nodes2) {
  879. if (
  880. (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
  881. ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
  882. ) {
  883. $matched1{$node1}=$node2;
  884. $matched2{$node2}=$node1;
  885. next OUTER;
  886. }
  887. }
  888. }
  889. return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
  890. }
  891. =head2 mu_distance_generalized
  892. Title : mu_distance_generalized
  893. Usage : my $dist=$net1->mu_distance($net2)
  894. Function: Computes the mu-distance between the topological restrictions of
  895. networks $net1 and $net2 on its common set of leaves
  896. Returns : scalar
  897. Args : Bio::PhyloNetwork
  898. =cut
  899. sub mu_distance_generalized {
  900. my ($net1,$net2)=@_;
  901. my ($netr1,$netr2)=$net1->topological_restriction($net2);
  902. return $netr1->mu_distance($netr2);
  903. }
  904. # mudata_string (code mu_data in a string; useful for isomorphism testing)
  905. sub mudata_string_node {
  906. my ($self,$u)=@_;
  907. return $self->{mudata}->{$u}->display();
  908. }
  909. sub mudata_string {
  910. my ($self)=@_;
  911. return $self->{mudata_string} if defined $self->{mudata_string};
  912. my @internal=$self->internal_nodes;
  913. my $mus=$self->{mudata};
  914. @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  915. my $str="";
  916. foreach my $node (@internal) {
  917. $str=$str.$self->mudata_string_node($node);
  918. }
  919. $self->{mudata_string}=$str;
  920. return $str;
  921. }
  922. sub is_mu_isomorphic {
  923. my ($net1,$net2)=@_;
  924. return ($net1->mudata_string() eq $net2->mudata_string());
  925. }
  926. # tripartitions
  927. sub compute_tripartition_node {
  928. my ($self,$u)=@_;
  929. $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
  930. unless ($self->{numroots} == 1);
  931. my $root=$self->{roots}->[0];
  932. my $graph=$self->{graph};
  933. my $graphPruned=$graph->copy();
  934. $graphPruned->delete_vertex($u);
  935. my $tripartition="";
  936. foreach my $leaf (@{$self->{leaves}}) {
  937. my $type;
  938. if ($graph->is_reachable($u,$leaf)) {
  939. if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
  940. else {$type="A";}
  941. }
  942. else {$type="C";}
  943. $tripartition .= $type;
  944. }
  945. $self->{tripartitions}->{$u}=$tripartition;
  946. }
  947. sub compute_tripartitions {
  948. my ($self)=@_;
  949. foreach my $node (@{$self->{nodes}}) {
  950. $self->compute_tripartition_node($node);
  951. }
  952. }
  953. =head2 tripartitions
  954. Title : tripartitions
  955. Usage : my %tripartitions=$net->tripartitions()
  956. Function: returns the set of tripartitions of $net
  957. Returns : hash
  958. Args : none
  959. $net-E<gt>tripartitions() returns a hash with keys the nodes of $net and each value
  960. is a string representing the tripartition of the leaves induced by the node.
  961. A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
  962. the set B(u), the second one in C(u), the third one in A(u), and so on.
  963. =cut
  964. sub tripartitions {
  965. my ($self)=@_;
  966. $self->compute_tripartitions() unless defined $self->{tripartitions};
  967. return %{$self->{tripartitions}};
  968. }
  969. # to do: change to tri_distance and test for TC and time-cons
  970. sub tripartition_error {
  971. my ($net1,$net2)=@_;
  972. my $comp = Array::Compare->new;
  973. $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
  974. unless $comp->compare($net1->{leaves},$net2->{leaves});
  975. $net1->warn("Not a tree-child phylogenetic network")
  976. unless $net1->is_tree_child();
  977. $net2->warn("Not a tree-child phylogenetic network")
  978. unless $net2->is_tree_child();
  979. $net1->warn("Not a time-consistent network")
  980. unless $net1->is_time_consistent();
  981. $net2->warn("Not a time-consistent network")
  982. unless $net2->is_time_consistent();
  983. $net1->compute_tripartitions() unless defined $net1->{tripartitions};
  984. $net2->compute_tripartitions() unless defined $net2->{tripartitions};
  985. my @edges1=$net1->{graph}->edges();
  986. my @edges2=$net2->{graph}->edges();
  987. my ($FN,$FP)=(0,0);
  988. foreach my $edge1 (@edges1) {
  989. my $matched=0;
  990. foreach my $edge2 (@edges2) {
  991. if ($net1->{tripartitions}->{$edge1->[1]} eq
  992. $net2->{tripartitions}->{$edge2->[1]}) {
  993. $matched=1;
  994. last;
  995. }
  996. }
  997. if (! $matched) {$FN++;}
  998. }
  999. foreach my $edge2 (@edges2) {
  1000. my $matched=0;
  1001. foreach my $edge1 (@edges1) {
  1002. if ($net1->{tripartitions}->{$edge1->[1]} eq
  1003. $net2->{tripartitions}->{$edge2->[1]}) {
  1004. $matched=1;
  1005. last;
  1006. }
  1007. }
  1008. if (! $matched) {$FP++;}
  1009. }
  1010. return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
  1011. }
  1012. # Time-consistency
  1013. # to do: add weak time consistency
  1014. =head2 is_time_consistent
  1015. Title : is_time_consistent
  1016. Usage : my $b=$net->is_time_consistent()
  1017. Function: tests if $net is (strong) time-consistent
  1018. Returns : boolean
  1019. Args : none
  1020. =cut
  1021. sub is_time_consistent {
  1022. my ($self)=@_;
  1023. $self->compute_temporal_representation()
  1024. unless exists $self->{has_temporal_representation};
  1025. return $self->{has_temporal_representation};
  1026. }
  1027. =head2 temporal_representation
  1028. Title : temporal_representation
  1029. Usage : my %time=$net->temporal_representation()
  1030. Function: returns a hash containing a temporal representation of $net, or 0
  1031. if $net is not time-consistent
  1032. Returns : hash
  1033. Args : none
  1034. =cut
  1035. sub temporal_representation {
  1036. my ($self)=@_;
  1037. if ($self->is_time_consistent) {
  1038. return %{$self->{temporal_representation}};
  1039. }
  1040. return 0;
  1041. }
  1042. sub compute_temporal_representation {
  1043. my ($self)=@_;
  1044. my $quotient=Graph::Directed->new();
  1045. my $classes=find_classes($self);
  1046. my %repr;
  1047. map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
  1048. foreach my $e ($self->tree_edges()) {
  1049. $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
  1050. }
  1051. my %temp;
  1052. my $depth=0;
  1053. while ($quotient->vertices()) {
  1054. if (my @svs=$quotient->predecessorless_vertices()) {
  1055. foreach my $sv (@svs) {
  1056. $temp{$sv}=$depth;
  1057. }
  1058. $quotient->delete_vertices(@svs);
  1059. } else {
  1060. return 0;
  1061. }
  1062. $depth++;
  1063. }
  1064. foreach my $node (@{$self->{nodes}}) {
  1065. $temp{$node}=$temp{$repr{$node}}
  1066. }
  1067. $self->{temporal_representation}=\%temp;
  1068. $self->{has_temporal_representation}=1;
  1069. }
  1070. sub find_classes {
  1071. my ($self)=@_;
  1072. my $classes={};
  1073. map {$classes->{$_}=[$_]} $self->nodes();
  1074. foreach my $e ($self->hybrid_edges()) {
  1075. $classes=join_classes($classes,$e->[0],$e->[1]);
  1076. }
  1077. return $classes;
  1078. }
  1079. sub join_classes {
  1080. my ($classes,$u,$v)=@_;
  1081. my @clu=@{$classes->{$u}};
  1082. my @clv=@{$classes->{$v}};
  1083. my @cljoin=(@clu,@clv);
  1084. map {$classes->{$_}=\@cljoin} @cljoin;
  1085. return $classes;
  1086. }
  1087. # alignment
  1088. =head2 contract_elementary
  1089. Title : contract_elementary
  1090. Usage : my ($contracted,$blocks)=$net->contract_elementary();
  1091. Function: Returns the network $contracted, obtained by contracting elementary
  1092. paths of $net into edges. The reference $blocks points to a hash
  1093. where, for each node of $contracted, gives the corresponding nodes
  1094. of $net that have been deleted.
  1095. Returns : Bio::PhyloNetwork,reference to hash
  1096. Args : none
  1097. =cut
  1098. sub contract_elementary {
  1099. my ($self)=@_;
  1100. my $contracted=$self->graph->copy();
  1101. my @nodes=$self->nodes();
  1102. my $mus=$self->{mudata};
  1103. my $hs=$self->{h};
  1104. my %blocks;
  1105. foreach my $u (@nodes) {
  1106. $blocks{$u}=[$u];
  1107. }
  1108. my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
  1109. @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
  1110. $hs->{$b} <=> $hs->{$a}} @elementary;
  1111. foreach my $elem (@elementary) {
  1112. my @children=$contracted->successors($elem);
  1113. my $child=$children[0];
  1114. if ($contracted->in_degree($elem) == 1) {
  1115. my @parents=$contracted->predecessors($elem);
  1116. my $parent=$parents[0];
  1117. $contracted->add_edge($parent,$child);
  1118. }
  1119. $contracted->delete_vertex($elem);
  1120. my @blch=@{$blocks{$child}};
  1121. my @blem=@{$blocks{$elem}};
  1122. $blocks{$child}=[@blem,@blch];
  1123. delete $blocks{$elem};
  1124. }
  1125. my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
  1126. return $contr,\%blocks;
  1127. }
  1128. =head2 optimal_alignment
  1129. Title : optimal_alignment
  1130. Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
  1131. Function: returns the total weight of an optimal alignment,
  1132. the alignment itself, and partial weights
  1133. between the networks $net1 and $net2 on the same set of leaves.
  1134. An optional argument allows to use the Manhattan (default) or the
  1135. Hamming distance between mu-vectors.
  1136. Returns : scalar,reference to hash,reference to hash
  1137. Args : Bio::PhyloNetwork,
  1138. -metric => string (optional)
  1139. Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
  1140. =cut
  1141. sub optimal_alignment {
  1142. my ($net1,$net2,%params)=@_;
  1143. my ($net1cont,$blocks1)=contract_elementary($net1);
  1144. my ($net2cont,$blocks2)=contract_elementary($net2);
  1145. my ($wc,$alignc,$weightc)=
  1146. optimal_alignment_noelementary($net1cont,$net2cont,%params);
  1147. my %alignment=();
  1148. my $totalweigth=0;
  1149. my %weigths=();
  1150. foreach my $u1 (keys %$alignc) {
  1151. my $u2=$alignc->{$u1};
  1152. my @block1=@{$blocks1->{$u1}};
  1153. my @block2=@{$blocks2->{$u2}};
  1154. while (@block1 && @block2) {
  1155. my $u1dc=pop @block1;
  1156. my $u2dc=pop @block2;
  1157. $alignment{$u1dc}=$u2dc;
  1158. $weigths{$u1dc}=$weightc->{$u1};
  1159. $totalweigth+=$weigths{$u1dc};
  1160. }
  1161. }
  1162. return $totalweigth,\%alignment,\%weigths;
  1163. }
  1164. sub optimal_alignment_noelementary {
  1165. my ($net1,$net2,%params)=@_;
  1166. my $comp = Array::Compare->new;
  1167. $net1->throw("Cannot align phylogenetic networks on different set of leaves")
  1168. unless $comp->compare($net1->{leaves},$net2->{leaves});
  1169. my $distance;
  1170. if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
  1171. $distance='Hamming';
  1172. } else {
  1173. $distance='Manhattan';
  1174. }
  1175. my $numleaves=$net1->{numleaves};
  1176. my @nodes1=$net1->internal_nodes();
  1177. my @nodes2=$net2->internal_nodes();
  1178. my $numnodes1=@nodes1;
  1179. my $numnodes2=@nodes2;
  1180. my @matrix=();
  1181. for (my $i=0;$i<$numnodes1;$i++) {
  1182. my @row=();
  1183. for (my $j=0;$j<$numnodes2;$j++) {
  1184. push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
  1185. }
  1186. push @matrix,\@row;
  1187. }
  1188. my @alignment=();
  1189. assign(\@matrix,\@alignment);
  1190. my %alignmenthash;
  1191. my %weighthash;
  1192. my $totalw=0;
  1193. foreach my $leaf (@{$net1->{leaves}}) {
  1194. $alignmenthash{$leaf}=$leaf;
  1195. $weighthash{$leaf}=0;
  1196. }
  1197. for (my $i=0;$i<$numnodes1;$i++) {
  1198. if (defined $nodes2[$alignment[$i]]) {
  1199. $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
  1200. $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
  1201. $totalw += $matrix[$i][$alignment[$i]];
  1202. }
  1203. }
  1204. return $totalw,\%alignmenthash,\%weighthash;
  1205. }
  1206. =head2 optimal_alignment_generalized
  1207. Title : optimal_alignment_generalized
  1208. Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
  1209. Function: returns the wieght of an optimal alignment, and the alignment itself,
  1210. between the topological restriction of the networks $net1 and $net2
  1211. on the set of common leaves.
  1212. An optional argument allows to use the Manhattan (default) or the
  1213. Hamming distance between mu-vectors.
  1214. Returns : scalar,hash
  1215. Args : Bio::PhyloNetwork,
  1216. -metric => string (optional)
  1217. Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
  1218. =cut
  1219. sub optimal_alignment_generalized {
  1220. my ($net1,$net2,%params)=@_;
  1221. my ($netr1,$netr2)=$net1->topological_restriction($net2);
  1222. return $netr1->optimal_alignment($netr2,%params);
  1223. }
  1224. sub weight {
  1225. my ($net1,$v1,$net2,$v2,$distance)=@_;
  1226. my $w;
  1227. if (! defined $distance) {
  1228. $distance='Manhattan';
  1229. }
  1230. if ($distance eq 'Hamming') {
  1231. $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
  1232. } else {
  1233. $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
  1234. }
  1235. if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
  1236. ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
  1237. )
  1238. {
  1239. $w +=1/(2*$net1->{numleaves});
  1240. }
  1241. return $w;
  1242. }
  1243. =head2 topological_restriction
  1244. Title : topological_restriction
  1245. Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
  1246. Function: returns the topological restriction of $net1 and $net2 on its
  1247. common set of leaves
  1248. Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
  1249. Args : Bio::PhyloNetwork
  1250. =cut
  1251. sub topological_restriction {
  1252. my ($net1,$net2)=@_;
  1253. my @leaves1=$net1->leaves();
  1254. my @leaves2=$net2->leaves();
  1255. my $numleaves1=scalar @leaves1;
  1256. my $numleaves2=scalar @leaves2;
  1257. my %position1;
  1258. for (my $i=0; $i<$numleaves1; $i++) {
  1259. $position1{$leaves1[$i]}=$i;
  1260. }
  1261. my %position2;
  1262. my @commonleaves=();
  1263. for (my $j=0; $j<$numleaves2; $j++) {
  1264. if (defined $position1{$leaves2[$j]}) {
  1265. push @commonleaves,$leaves2[$j];
  1266. $position2{$leaves2[$j]}=$j;
  1267. }
  1268. }
  1269. my $graphred1=$net1->{graph}->copy();
  1270. my $graphred2=$net2->{graph}->copy();
  1271. OUTER1:
  1272. foreach my $u ($graphred1->vertices()) {
  1273. my $mu=$net1->mudata_node($u);
  1274. foreach my $leaf (@commonleaves) {
  1275. if ($mu->[$position1{$leaf}]>0) {
  1276. next OUTER1;
  1277. }
  1278. }
  1279. $graphred1->delete_vertex($u);
  1280. }
  1281. OUTER2:
  1282. foreach my $u ($graphred2->vertices()) {
  1283. my $mu=$net2->mudata_node($u);
  1284. foreach my $leaf (@commonleaves) {
  1285. if ($mu->[$position2{$leaf}]>0) {
  1286. next OUTER2;
  1287. }
  1288. }
  1289. $graphred2->delete_vertex($u);
  1290. }
  1291. my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
  1292. my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
  1293. return ($netr1,$netr2);
  1294. }
  1295. # Functions for eNewick representation
  1296. =head2 eNewick
  1297. Title : eNewick
  1298. Usage : my $str=$net->eNewick()
  1299. Function: returns the eNewick representation of $net without labeling
  1300. internal tree nodes
  1301. Returns : string
  1302. Args : none
  1303. =cut
  1304. sub eNewick {
  1305. my ($self)=@_;
  1306. my $str="";
  1307. my $seen={};
  1308. foreach my $root ($self->roots()) {
  1309. $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
  1310. }
  1311. return $str;
  1312. }
  1313. sub eNewick_aux {
  1314. my ($self,$node,$seen,$parent)=@_;
  1315. my $str='';
  1316. if ($self->is_leaf($node) ||
  1317. (defined $seen->{$node}) )
  1318. {
  1319. $str=make_label($self,$parent,$node);
  1320. }
  1321. else {
  1322. $seen->{$node}=1;
  1323. my @sons=$self->{graph}->successors($node);
  1324. $str="(";
  1325. foreach my $son (@sons) {
  1326. $str=$str.$self->eNewick_aux($son,$seen,$node).",";
  1327. }
  1328. chop($str);
  1329. $str.=")".make_label($self,$parent,$node);
  1330. }
  1331. return $str;
  1332. }
  1333. sub make_label {
  1334. my ($self,$parent,$node)=@_;
  1335. my $str='';
  1336. if ($self->is_hybrid_node($node)) {
  1337. my $lbl=$self->{labels}->{$node};
  1338. if ($lbl =~ /#/) {
  1339. $lbl='';
  1340. }
  1341. $str.=$lbl; #$self->{labels}->{$node};
  1342. $str.='#';
  1343. if ((defined $parent) &&
  1344. ($self->graph->has_edge_attribute($parent,$node,'type'))) {
  1345. $str.=$self->graph->get_edge_attribute($parent,$node,'type');
  1346. }
  1347. $str.=substr $node,1;
  1348. } else {
  1349. $str.=$self->{labels}->{$node};
  1350. }
  1351. if ((defined $parent) &&
  1352. ($self->graph->has_edge_weight($parent,$node))) {
  1353. $str.=":".$self->graph->get_edge_weight($parent,$node);
  1354. }
  1355. return $str;
  1356. }
  1357. =head2 eNewick_full
  1358. Title : eNewick_full
  1359. Usage : my $str=$net->eNewick_full()
  1360. Function: returns the eNewick representation of $net labeling
  1361. internal tree nodes
  1362. Returns : string
  1363. Args : none
  1364. =cut
  1365. sub eNewick_full {
  1366. my ($self)=@_;
  1367. my $str="";
  1368. my $seen={};
  1369. foreach my $root ($self->roots()) {
  1370. $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
  1371. }
  1372. return $str;
  1373. }
  1374. sub eNewick_full_aux {
  1375. my ($self,$node,$seen,$parent)=@_;
  1376. my $str='';
  1377. if ($self->is_leaf($node) ||
  1378. (defined $seen->{$node}) )
  1379. {
  1380. $str=make_label_full($self,$parent,$node);
  1381. }
  1382. else {
  1383. $seen->{$node}=1;
  1384. my @sons=$self->{graph}->successors($node);
  1385. $str="(";
  1386. foreach my $son (@sons) {
  1387. $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
  1388. }
  1389. chop($str);
  1390. $str.=")".make_label_full($self,$parent,$node);
  1391. }
  1392. return $str;
  1393. }
  1394. sub make_label_full {
  1395. my ($self,$parent,$node)=@_;
  1396. my $str='';
  1397. if ($self->is_hybrid_node($node)) {
  1398. my $lbl=$self->{labels}->{$node};
  1399. if ($lbl =~ /#/) {
  1400. $lbl='';
  1401. }
  1402. $str.=$lbl; #$self->{labels}->{$node};
  1403. $str.='#';
  1404. if ((defined $parent) &&
  1405. ($self->graph->has_edge_attribute($parent,$node,'type'))) {
  1406. $str.=$self->graph->get_edge_attribute($parent,$node,'type');
  1407. }
  1408. $str.=substr $node,1;
  1409. } else {
  1410. if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
  1411. $str.=$self->{labels}->{$node};
  1412. }
  1413. else {
  1414. $str.=$node;
  1415. }
  1416. }
  1417. if ((defined $parent) &&
  1418. ($self->graph->has_edge_weight($parent,$node))) {
  1419. $str.=":".$self->graph->get_edge_weight($parent,$node);
  1420. }
  1421. return $str;
  1422. }
  1423. # sub eNewick_full {
  1424. # my ($self)=@_;
  1425. # my $str="";
  1426. # my $seen={};
  1427. # foreach my $root ($self->roots()) {
  1428. # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
  1429. # }
  1430. # return $str;
  1431. # }
  1432. # sub eNewick_full_aux {
  1433. # my ($self,$node,$seen,$parent)=@_;
  1434. # my $str;
  1435. # if ($self->is_leaf($node) ||
  1436. # (defined $seen->{$node}) )
  1437. # {
  1438. # if ($self->is_hybrid_node($node)) {
  1439. # my $tag=substr $node,1;
  1440. # if ((defined $parent) &&
  1441. # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
  1442. # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
  1443. # } else {
  1444. # $str=$node;
  1445. # }
  1446. # } else {
  1447. # $str=$node;
  1448. # }
  1449. # }
  1450. # else {
  1451. # $seen->{$node}=1;
  1452. # my @sons=$self->{graph}->successors($node);
  1453. # $str="(";
  1454. # foreach my $son (@sons) {
  1455. # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
  1456. # }
  1457. # chop($str);
  1458. # if ($self->is_hybrid_node($node)) {
  1459. # my $tag=substr $node,1;
  1460. # if ((defined $parent) &&
  1461. # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
  1462. # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
  1463. # } else {
  1464. # $str.=")$node";
  1465. # }
  1466. # } else {
  1467. # $str.=")$node";
  1468. # }
  1469. # }
  1470. # if ((defined $parent) &&
  1471. # ($self->graph->has_edge_weight($parent,$node))) {
  1472. # $str.=":".$self->graph->get_edge_weight($parent,$node);
  1473. # }
  1474. # return $str;
  1475. # }
  1476. # displaying data
  1477. use overload '""' => \&display;
  1478. =head2 display
  1479. Title : display
  1480. Usage : my $str=$net->display()
  1481. Function: returns a string containing all the available information on $net
  1482. Returns : string
  1483. Args : none
  1484. =cut
  1485. sub display {
  1486. my ($self)=@_;
  1487. my $str="";
  1488. my $graph=$self->{graph};
  1489. my @leaves=$self->leaves();
  1490. my @nodes=@{$self->{nodes}};
  1491. $str.= "Leaves:\t@leaves\n";
  1492. $str.= "Nodes:\t@nodes\n";
  1493. $str.= "Graph:\t$graph\n";
  1494. $str.= "eNewick:\t".$self->eNewick()."\n";
  1495. $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
  1496. $str.= "Mu-data and heights:\n";
  1497. foreach my $node (@nodes) {
  1498. $str.= "v=$node: ";
  1499. if (exists $self->{labels}->{$node}) {
  1500. $str.="\tlabel=".$self->{labels}->{$node}.",";
  1501. } else {
  1502. $str.="\tlabel=(none),";
  1503. }
  1504. $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
  1505. }
  1506. if (exists $self->{has_temporal_representation}) {
  1507. $str.= "Temporal representation:\n";
  1508. if ($self->{has_temporal_representation}) {
  1509. foreach my $node (@nodes) {
  1510. $str.= "v=$node; ";
  1511. $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
  1512. }
  1513. } else {
  1514. $str.= "Does not exist.\n";
  1515. }
  1516. }
  1517. if (exists $self->{tripartitions}) {
  1518. $str.= "Tripartitions:\n";
  1519. foreach my $node (@nodes) {
  1520. $str.= "v=$node; ";
  1521. $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
  1522. }
  1523. }
  1524. return $str;
  1525. }
  1526. 1;