PageRenderTime 49ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/perllib/Bio/Tree/Statistics.pm

https://bitbucket.org/hirenj/snp-server
Perl | 838 lines | 681 code | 139 blank | 18 comment | 51 complexity | f0e3dffd1467e8fc567a8e951300a6dd MD5 | raw file
  1. # $Id: Statistics.pm 16123 2009-09-17 12:57:27Z cjfields $
  2. #
  3. # BioPerl module for Bio::Tree::Statistics
  4. #
  5. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  6. #
  7. # Cared for by Jason Stajich <jason@bioperl.org>
  8. #
  9. # Copyright Jason Stajich
  10. #
  11. # You may distribute this module under the same terms as perl itself
  12. # POD documentation - main docs before the code
  13. =head1 NAME
  14. Bio::Tree::Statistics - Calculate certain statistics for a Tree
  15. =head1 SYNOPSIS
  16. use Bio::Tree::Statistics;
  17. =head1 DESCRIPTION
  18. This should be where Tree statistics are calculated. It was
  19. previously where statistics from a Coalescent simulation.
  20. It now contains several methods for calculating L<Tree-Trait
  21. statistics>.
  22. =head1 FEEDBACK
  23. =head2 Mailing Lists
  24. User feedback is an integral part of the evolution of this and other
  25. Bioperl modules. Send your comments and suggestions preferably to
  26. the Bioperl mailing list. Your participation is much appreciated.
  27. bioperl-l@bioperl.org - General discussion
  28. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  29. =head2 Support
  30. Please direct usage questions or support issues to the mailing list:
  31. I<bioperl-l@bioperl.org>
  32. rather than to the module maintainer directly. Many experienced and
  33. reponsive experts will be able look at the problem and quickly
  34. address it. Please include a thorough description of the problem
  35. with code and data examples if at all possible.
  36. =head2 Reporting Bugs
  37. Report bugs to the Bioperl bug tracking system to help us keep track
  38. of the bugs and their resolution. Bug reports can be submitted via
  39. the web:
  40. http://bugzilla.open-bio.org/
  41. =head1 AUTHOR - Jason Stajich
  42. Email jason AT bioperl.org
  43. =head1 CONTRIBUTORS
  44. Heikki Lehvaslaiho, heikki at bioperl dot org
  45. =head1 APPENDIX
  46. The rest of the documentation details each of the object methods.
  47. Internal methods are usually preceded with a _
  48. =cut
  49. # Let the code begin...
  50. package Bio::Tree::Statistics;
  51. use strict;
  52. use base qw(Bio::Root::Root);
  53. =head2 new
  54. Title : new
  55. Usage : my $obj = Bio::Tree::Statistics->new();
  56. Function: Builds a new Bio::Tree::Statistics object
  57. Returns : Bio::Tree::Statistics
  58. Args :
  59. =cut
  60. =head2 assess_bootstrap
  61. Title : assess_bootstrap
  62. Usage : my $tree_with_bs = $stats->assess_bootstrap(\@bs_trees);
  63. Function: Calculates the bootstrap for internal nodes based on
  64. Returns : L<Bio::Tree::TreeI>
  65. Args : Arrayref of L<Bio::Tree::TreeI>s
  66. =cut
  67. sub assess_bootstrap{
  68. my ($self,$bs_trees,$guide_tree) = @_;
  69. my @consensus;
  70. # internal nodes are defined by their children
  71. my (%lookup,%internal);
  72. my $i = 0;
  73. for my $tree ( $guide_tree, @$bs_trees ) {
  74. # Do this as a top down approach, can probably be
  75. # improved by caching internal node states, but not going
  76. # to worry about it right now.
  77. my @allnodes = $tree->get_nodes;
  78. my @internalnodes = grep { ! $_->is_Leaf } @allnodes;
  79. for my $node ( @internalnodes ) {
  80. my @tips = sort map { $_->id }
  81. grep { $_->is_Leaf() } $node->get_all_Descendents;
  82. my $id = "(".join(",", @tips).")";
  83. if( $i == 0 ) {
  84. $internal{$id} = $node->internal_id;
  85. } else {
  86. $lookup{$id}++;
  87. }
  88. }
  89. $i++;
  90. }
  91. my @save;
  92. for my $l ( keys %lookup ) {
  93. if( defined $internal{$l} ) {#&& $lookup{$l} > $min_seen ) {
  94. my $intnode = $guide_tree->find_node(-internal_id => $internal{$l});
  95. $intnode->bootstrap(sprintf("%d",100 * $lookup{$l} / $i));
  96. }
  97. }
  98. return $guide_tree;
  99. }
  100. =head2 cherries
  101. Example : cherries($tree, $node);
  102. Description: Count number of paired leaf nodes
  103. in a binary tree
  104. Returns : integer
  105. Exceptions :
  106. Args : 1. Bio::Tree::TreeI object
  107. 2. Bio::Tree::NodeI object within the tree, optional
  108. Commonly used statistics assume a binary tree, but this methods
  109. returns a value even for trees with polytomies.
  110. =cut
  111. sub cherries ($;$) {
  112. my $self = shift;
  113. my $tree = shift;
  114. my $node = shift || $tree->get_root_node;
  115. my $cherries = 0;
  116. my @descs = $node->each_Descendent;
  117. if ($descs[0]->is_Leaf and $descs[1]->is_Leaf) {
  118. if ($descs[3]) { #polytomy at leaf level
  119. $cherries = 0;
  120. } else {
  121. $cherries = 1;
  122. }
  123. } else {
  124. # recurse
  125. foreach my $desc (@descs) {
  126. $cherries += $self->cherries($tree, $desc);
  127. }
  128. }
  129. return $cherries;
  130. }
  131. =head2 Tree-Trait statistics
  132. The following methods produce desciptors of trait distribution among
  133. leaf nodes within the trees. They require that a trait has been set
  134. for each leaf node. The tag methods of Bio::Tree::Node are used to
  135. store them as key/value pairs. In this way, one tree can store more
  136. than one trait.
  137. Trees have method add_traits() to set trait values from a file.
  138. =head2 fitch
  139. Example : fitch($tree, $key, $node);
  140. Description: Calculates Parsimony Score (PS) and internal trait
  141. values using the Fitch 1971 parsimony algorithm for
  142. the subtree a defined by the (internal) node.
  143. Node defaults to the root.
  144. Returns : true on success
  145. Exceptions : leaf nodes have to have the trait defined
  146. Args : 1. Bio::Tree::TreeI object
  147. 2. trait name string
  148. 3. Bio::Tree::NodeI object within the tree, optional
  149. Runs first L<fitch_up> that calculats parsimony scores and then
  150. L<fitch_down> that should resolve most of the trait/character state
  151. ambiguities.
  152. Fitch, W.M., 1971. Toward defining the course of evolution: minimal
  153. change for a specific tree topology. Syst. Zool. 20, 406-416.
  154. You can access calculated parsimony values using:
  155. $score = $node->->get_tag_values('ps_score');
  156. and the trait value with:
  157. $traitvalue = $node->->get_tag_values('ps_trait'); # only the first
  158. @traitvalues = $node->->get_tag_values('ps_trait');
  159. Note that there can be more that one trait value, especially for the
  160. root node.
  161. =cut
  162. sub fitch {
  163. my $self = shift;
  164. my $tree = shift;
  165. my $key = shift || $self->throw("Trait name is needed");
  166. my $node = shift || $tree->get_root_node;
  167. $self->fitch_up($tree, $key, $node);
  168. $self->fitch_down($tree, $node);
  169. }
  170. =head2 ps
  171. Example : ps($tree, $key, $node);
  172. Description: Calculates Parsimony Score (PS) from Fitch 1971
  173. parsimony algorithm for the subtree a defined
  174. by the (internal) node.
  175. Node defaults to the root.
  176. Returns : integer, 1< PS < n, where n is number of branches
  177. Exceptions : leaf nodes have to have the trait defined
  178. Args : 1. Bio::Tree::TreeI object
  179. 2. trait name string
  180. 3. Bio::Tree::NodeI object within the tree, optional
  181. This is the first half of the Fitch algorithm that is enough for
  182. calculating the resolved parsimony values. The trait/chararacter
  183. states are commonly left in ambiguos state. To resolve them, run
  184. L<fitch_down>.
  185. =cut
  186. sub ps { shift->fitch_up(@_) }
  187. =head2 fitch_up
  188. Example : fitch_up($tree, $key, $node);
  189. Description: Calculates Parsimony Score (PS) from the Fitch 1971
  190. parsimony algorithm for the subtree a defined
  191. by the (internal) node.
  192. Node defaults to the root.
  193. Returns : integer, 1< PS < n, where n is number of branches
  194. Exceptions : leaf nodes have to have the trait defined
  195. Args : 1. Bio::Tree::TreeI object
  196. 2. trait name string
  197. 3. Bio::Tree::NodeI object within the tree, optional
  198. This is a more generic name for L<ps> and indicates that it performs
  199. the first bottom-up tree traversal that calculates the parsimony score
  200. but usually leaves trait/character states ambiguous. If you are
  201. interested in internal trait states, running L<fitch_down> should
  202. resolve most of the ambiguities.
  203. =cut
  204. sub fitch_up {
  205. my $self = shift;
  206. my $tree = shift;
  207. my $key = shift || $self->throw("Trait name is needed");
  208. my $node = shift || $tree->get_root_node;
  209. if ($node->is_Leaf) {
  210. $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
  211. unless $node->has_tag($key);
  212. $node->set_tag_value('ps_trait', $node->get_tag_values($key) );
  213. $node->set_tag_value('ps_score', 0 );
  214. return; # end of recursion
  215. }
  216. foreach my $child ($node->each_Descendent) {
  217. $self->fitch_up($tree, $key, $child);
  218. }
  219. my %intersection;
  220. my %union;
  221. my $score;
  222. foreach my $child ($node->each_Descendent) {
  223. foreach my $trait ($child->get_tag_values('ps_trait') ) {
  224. $intersection{$trait}++ if $union{$trait};
  225. $union{$trait}++;
  226. }
  227. $score += $child->get_tag_values('ps_score');
  228. }
  229. if (keys %intersection) {
  230. $node->set_tag_value('ps_trait', keys %intersection);
  231. $node->set_tag_value('ps_score', $score);
  232. } else {
  233. $node->set_tag_value('ps_trait', keys %union);
  234. $node->set_tag_value('ps_score', $score+1);
  235. }
  236. if ($self->verbose) {
  237. print "-- node --------------------------\n";
  238. print "iID: ", $node->internal_id, " (", $node->id, ")\n";
  239. print "Trait: ", join (', ', $node->get_tag_values('ps_trait') ), "\n";
  240. print "length :", scalar($node->get_tag_values('ps_score')) , "\n";
  241. }
  242. return scalar $node->get_tag_values('ps_score');
  243. }
  244. =head2 fitch_down
  245. Example : fitch_down($tree, $node);
  246. Description: Runs the second pass from Fitch 1971
  247. parsimony algorithm to resolve ambiguous
  248. trait states left by first pass.
  249. by the (internal) node.
  250. Node defaults to the root.
  251. Returns : true
  252. Exceptions : dies unless the trait is defined in all nodes
  253. Args : 1. Bio::Tree::TreeI object
  254. 2. Bio::Tree::NodeI object within the tree, optional
  255. Before running this method you should have ran L<fitch_up> (alias to
  256. L<ps> ). Note that it is not guarantied that all states are completely
  257. resolved.
  258. =cut
  259. sub fitch_down {
  260. my $self = shift;
  261. my $tree = shift;
  262. my $node = shift || $tree->get_root_node;
  263. my $key = 'ps_trait';
  264. $self->throw ("ERROR: ". $node->internal_id. " needs a value for $key")
  265. unless $node->has_tag($key);
  266. my $nodev;
  267. foreach my $trait ($node->get_tag_values($key) ) {
  268. $nodev->{$trait}++;
  269. }
  270. foreach my $child ($node->each_Descendent) {
  271. next if $child->is_Leaf; # end of recursion
  272. my $intersection;
  273. foreach my $trait ($child->get_tag_values($key) ) {
  274. $intersection->{$trait}++ if $nodev->{$trait};
  275. }
  276. $self->fitch_down($tree, $child);
  277. $child->set_tag_value($key, keys %$intersection);
  278. }
  279. return 1; # success
  280. }
  281. =head2 persistence
  282. Example : persistence($tree, $node);
  283. Description: Calculates the persistence
  284. for node in the subtree defined by the (internal)
  285. node. Node defaults to the root.
  286. Returns : int, number of generations trait value has to remain same
  287. Exceptions : all the nodes need to have the trait defined
  288. Args : 1. Bio::Tree::TreeI object
  289. 2. Bio::Tree::NodeI object within the tree, optional
  290. Persistence is a measure of the stability the trait value has in a
  291. tree. It expresses the number of generations the trait value remains
  292. the same. All the decendants of the root in the same generation have
  293. to share the same value.
  294. Depends on Fitch's parsimony score (PS).
  295. =cut
  296. sub _persistence {
  297. my $self = shift;
  298. my $tree = shift;
  299. my $node = shift;
  300. my $value = shift || $self->throw("Value is needed");
  301. my $key = 'ps_trait';
  302. $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
  303. return 0 unless $node->get_tag_values($key) eq $value; # wrong value
  304. return 1 if $node->is_Leaf; # end of recursion
  305. my $persistence = 10000000; # an arbitrarily large number
  306. foreach my $child ($node->each_Descendent) {
  307. my $pers = $self->_persistence($tree, $child, $value);
  308. $persistence = $pers if $pers < $persistence;
  309. }
  310. return $persistence + 1;
  311. }
  312. sub persistence {
  313. my $self = shift;
  314. my $tree = shift;
  315. my $node = shift || $tree->get_root_node;
  316. $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
  317. my $key = 'ps_trait';
  318. my $value = $node->get_tag_values($key);
  319. #calculate
  320. my $persistence = $self->_persistence($tree, $node, $value);
  321. $node->set_tag_value('persistance', $persistence);
  322. return $persistence;
  323. }
  324. =head2 count_subclusters
  325. Example : count_clusters($tree, $node);
  326. Description: Calculates the number of sub-clusters
  327. in the subtree defined by the (internal)
  328. node. Node defaults to the root.
  329. Returns : int, count
  330. Exceptions : all the nodes need to have the trait defined
  331. Args : 1. Bio::Tree::TreeI object
  332. 2. Bio::Tree::NodeI object within the tree, optional
  333. Depends on Fitch's parsimony score (PS).
  334. =cut
  335. sub _count_subclusters {
  336. my $self = shift;
  337. my $tree = shift;
  338. my $node = shift;
  339. my $value = shift || $self->throw("Value is needed");
  340. my $key = 'ps_trait';
  341. $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
  342. unless $node->has_tag($key);
  343. if ($node->get_tag_values($key) eq $value) {
  344. if ($node->get_tag_values('ps_score') == 0) {
  345. return 0;
  346. } else {
  347. my $count = 0;
  348. foreach my $child ($node->each_Descendent) {
  349. $count += $self->_count_subclusters($tree, $child, $value);
  350. }
  351. return $count;
  352. }
  353. }
  354. return 1;
  355. }
  356. sub count_subclusters {
  357. my $self = shift;
  358. my $tree = shift;
  359. my $node = shift || $tree->get_root_node;
  360. $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
  361. my $key = 'ps_trait';
  362. my $value = $node->get_tag_values($key);
  363. return $self->_count_subclusters($tree, $node, $value);
  364. }
  365. =head2 count_leaves
  366. Example : count_leaves($tree, $node);
  367. Description: Calculates the number of leaves with same trait
  368. value as root in the subtree defined by the (internal)
  369. node. Requires an unbroken line of identical trait values.
  370. Node defaults to the root.
  371. Returns : int, number of leaves with this trait value
  372. Exceptions : all the nodes need to have the trait defined
  373. Args : 1. Bio::Tree::TreeI object
  374. 2. Bio::Tree::NodeI object within the tree, optional
  375. Depends on Fitch's parsimony score (PS).
  376. =cut
  377. sub _count_leaves {
  378. my $self = shift;
  379. my $tree = shift;
  380. my $node = shift || $tree->get_root_node;
  381. my $value = shift;
  382. my $key = 'ps_trait';
  383. $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
  384. unless $node->has_tag($key);
  385. if ($node->get_tag_values($key) eq $value) {
  386. #print $node->id, ": ", $node->get_tag_values($key), "\n";
  387. return 1 if $node->is_Leaf; # end of recursion
  388. my $count = 0;
  389. foreach my $child ($node->each_Descendent) {
  390. $count += $self->_count_leaves($tree, $child, $value);
  391. }
  392. return $count;
  393. }
  394. return 0;
  395. }
  396. sub count_leaves {
  397. my $self = shift;
  398. my $tree = shift;
  399. my $node = shift || $tree->get_root_node;
  400. $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
  401. my $key = 'ps_trait';
  402. my $value = $node->get_tag_values($key);
  403. return $self->_count_leaves($tree, $node, $value);
  404. }
  405. =head2 phylotype_length
  406. Example : phylotype_length($tree, $node);
  407. Description: Sums up the branch lengths within phylotype
  408. exluding the subclusters where the trait values
  409. are different
  410. Returns : float, length
  411. Exceptions : all the nodes need to have the trait defined
  412. Args : 1. Bio::Tree::TreeI object
  413. 2. Bio::Tree::NodeI object within the tree, optional
  414. Depends on Fitch's parsimony score (PS).
  415. =cut
  416. sub _phylotype_length {
  417. my $self = shift;
  418. my $tree = shift;
  419. my $node = shift;
  420. my $value = shift;
  421. my $key = 'ps_trait';
  422. $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
  423. unless $node->has_tag($key);
  424. return 0 if $node->get_tag_values($key) ne $value;
  425. return $node->branch_length if $node->is_Leaf; # end of recursion
  426. my $length = 0;
  427. foreach my $child ($node->each_Descendent) {
  428. my $sub_len = $self->_phylotype_length($tree, $child, $value);
  429. $length += $sub_len;
  430. $length += $child->branch_length if not $child->is_Leaf and $sub_len;
  431. }
  432. return $length;
  433. }
  434. sub phylotype_length {
  435. my $self = shift;
  436. my $tree = shift;
  437. my $node = shift || $tree->get_root_node;
  438. my $key = 'ps_trait';
  439. my $value = $node->get_tag_values($key);
  440. return $self->_phylotype_length($tree, $node, $value);
  441. }
  442. =head2 sum_of_leaf_distances
  443. Example : sum_of_leaf_distances($tree, $node);
  444. Description: Sums up the branch lengths from root to leaf
  445. exluding the subclusters where the trait values
  446. are different
  447. Returns : float, length
  448. Exceptions : all the nodes need to have the trait defined
  449. Args : 1. Bio::Tree::TreeI object
  450. 2. Bio::Tree::NodeI object within the tree, optional
  451. Depends on Fitch's parsimony score (PS).
  452. =cut
  453. sub _sum_of_leaf_distances {
  454. my $self = shift;
  455. my $tree = shift;
  456. my $node = shift;
  457. my $value = shift;
  458. my $key = 'ps_trait';
  459. $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
  460. unless $node->has_tag($key);
  461. return 0 if $node->get_tag_values($key) ne $value;
  462. #return $node->branch_length if $node->is_Leaf; # end of recursion
  463. return 0 if $node->is_Leaf; # end of recursion
  464. my $length = 0;
  465. foreach my $child ($node->each_Descendent) {
  466. $length += $self->_count_leaves($tree, $child, $value) * $child->branch_length +
  467. $self->_sum_of_leaf_distances($tree, $child, $value);
  468. }
  469. return $length;
  470. }
  471. sub sum_of_leaf_distances {
  472. my $self = shift;
  473. my $tree = shift;
  474. my $node = shift || $tree->get_root_node;
  475. my $key = 'ps_trait';
  476. my $value = $node->get_tag_values($key);
  477. return $self->_sum_of_leaf_distances($tree, $node, $value);
  478. }
  479. =head2 genetic_diversity
  480. Example : genetic_diversity($tree, $node);
  481. Description: Diversity is the sum of root to leaf distances
  482. within the phylotype normalised by number of leaf
  483. nodes
  484. Returns : float, value of genetic diversity
  485. Exceptions : all the nodes need to have the trait defined
  486. Args : 1. Bio::Tree::TreeI object
  487. 2. Bio::Tree::NodeI object within the tree, optional
  488. Depends on Fitch's parsimony score (PS).
  489. =cut
  490. sub genetic_diversity {
  491. my $self = shift;
  492. my $tree = shift;
  493. my $node = shift || $tree->get_root_node;
  494. return $self->sum_of_leaf_distances($tree, $node) /
  495. $self->count_leaves($tree, $node);
  496. }
  497. =head2 statratio
  498. Example : statratio($tree, $node);
  499. Description: Ratio of the stem length and the genetic diversity of the
  500. phylotype L<genetic_diversity>
  501. Returns : float, separation score
  502. Exceptions : all the nodes need to have the trait defined
  503. Args : 1. Bio::Tree::TreeI object
  504. 2. Bio::Tree::NodeI object within the tree, optional
  505. TStatratio gives a measure of separation and variability within the phylotype.
  506. Larger values identify more rapidly evolving and recent phylotypes.
  507. Depends on Fitch's parsimony score (PS).
  508. =cut
  509. sub statratio {
  510. my $self = shift;
  511. my $tree = shift;
  512. my $node = shift || $tree->get_root_node;
  513. my $div = $self->genetic_diversity($tree, $node);
  514. return 0 if $div == 0;
  515. return $node->branch_length / $div;
  516. }
  517. =head2 ai
  518. Example : ai($tree, $key, $node);
  519. Description: Calculates the Association Index (AI) of Whang et
  520. al. 2001 for the subtree defined by the (internal)
  521. node. Node defaults to the root.
  522. Returns : real
  523. Exceptions : leaf nodes have to have the trait defined
  524. Args : 1. Bio::Tree::TreeI object
  525. 2. trait name string
  526. 3. Bio::Tree::NodeI object within the tree, optional
  527. Association index (AI) gives a more fine grained results than PS since
  528. the result is a real number. ~0 E<lt>= AI.
  529. Wang, T.H., Donaldson, Y.K., Brettle, R.P., Bell, J.E., Simmonds, P.,
  530. 2001. Identification of shared populations of human immunodeficiency
  531. Virus Type 1 infecting microglia and tissue macrophages outside the
  532. central nervous system. J. Virol. 75 (23), 11686-11699.
  533. =cut
  534. sub _node_ai {
  535. my $self = shift;
  536. my $node = shift;
  537. my $key = shift;
  538. my $traits;
  539. my $leaf_count = 0;
  540. for my $desc ( $node->get_all_Descendents ) {
  541. next unless $desc->is_Leaf;
  542. $leaf_count++;
  543. $self->throw ("Node ". $desc->id. " needs a value for trait [$key]")
  544. unless $desc->has_tag($key);
  545. my $trait = $desc->get_tag_values($key);
  546. $traits->{$trait}++;
  547. }
  548. my $most_common = 0;
  549. foreach ( keys %$traits) {
  550. $most_common = $traits->{$_} if $traits->{$_} > $most_common;
  551. }
  552. return sprintf "%1.6f", (1 - ($most_common/$leaf_count) ) / (2**($leaf_count-1) );
  553. }
  554. sub ai {
  555. my $self = shift;
  556. my $tree = shift;
  557. my $key = shift || $self->throw("Trait name is needed");
  558. my $start_node = shift || $tree->get_root_node;
  559. return unless $start_node;
  560. my $sum = 0;
  561. for my $node ( $start_node->get_all_Descendents ) {
  562. next if $node->is_Leaf;
  563. $sum += $self->_node_ai($node, $key);
  564. }
  565. return $sum;
  566. }
  567. =head2 mc
  568. Example : mc($tree, $key, $node);
  569. Description: Calculates the Monophyletic Clade (MC) size statistics
  570. for the subtree a defined by the (internal) node.
  571. Node defaults to the root;
  572. Returns : hashref with trait values as keys
  573. Exceptions : leaf nodes have to have the trait defined
  574. Args : 1. Bio::Tree::TreeI object
  575. 2. trait name string
  576. 3. Bio::Tree::NodeI object within the tree, optional
  577. Monophyletic Clade (MC) size statistics by Salemi at al 2005. It is
  578. calculated for each trait value. 1 E<lt>= MC E<lt>= nx, where nx is the
  579. number of tips with value x:
  580. pick the internal node with maximim value for
  581. number of of tips with only trait x
  582. MC was defined by Parker et al 2008.
  583. Salemi, M., Lamers, S.L., Yu, S., de Oliveira, T., Fitch, W.M., McGrath, M.S.,
  584. 2005. Phylodynamic analysis of Human Immunodeficiency Virus Type 1 in
  585. distinct brain compartments provides a model for the neuropathogenesis of
  586. AIDS. J. Virol. 79 (17), 11343-11352.
  587. Parker, J., Rambaut A., Pybus O., 2008. Correlating viral phenotypes
  588. with phylogeny: Accounting for phylogenetic uncertainty Infection,
  589. Genetics and Evolution 8 (2008), 239-246.
  590. =cut
  591. sub _node_mc {
  592. my $self = shift;
  593. my $node = shift;
  594. my $key = shift;
  595. my $traits;
  596. my $leaf_count = 0;
  597. for my $node2 ( $node->get_all_Descendents ) {
  598. next unless $node2->is_Leaf;
  599. $leaf_count++;
  600. my $trait = $node2->get_tag_values($key);
  601. $traits->{$trait}++;
  602. }
  603. return $traits;
  604. }
  605. sub mc {
  606. my $self = shift;
  607. my $tree = shift;
  608. my $key = shift || die "Trait name is needed";
  609. my $start_node = shift || $tree->get_root_node;
  610. return unless $start_node;
  611. my $sum; # hashref, keys are trait values
  612. my $keys; # hashref, keys are trait values
  613. foreach my $node ( $start_node->get_all_Descendents ) {
  614. next if $node->is_Leaf;
  615. my $traits = $self->_node_mc($node, $key);
  616. if (scalar keys %$traits == 1) {
  617. my ($value) = keys %$traits;
  618. no warnings;
  619. $sum->{$value} = $traits->{$value}
  620. if $sum->{$value} < $traits->{$value};
  621. } else {
  622. map { $keys->{$_} = 1 } keys %$traits;
  623. }
  624. }
  625. # check for cases where there are no clusters
  626. foreach my $value (keys %$keys) {
  627. $sum->{$value} = 1 unless defined $sum->{$value};
  628. }
  629. return $sum;
  630. }
  631. 1;