PageRenderTime 138ms CodeModel.GetById 44ms RepoModel.GetById 10ms app.codeStats 0ms

/Data-Hierarchy-0.34/Hierarchy.pm

#
Perl | 583 lines | 397 code | 113 blank | 73 comment | 50 complexity | 499d79935ebc35494bf2ae8b8f11ff1c MD5 | raw file
  1. package Data::Hierarchy;
  2. $VERSION = '0.34';
  3. use strict;
  4. use Storable qw(dclone);
  5. # XXX consider using Moose
  6. =head1 NAME
  7. Data::Hierarchy - Handle data in a hierarchical structure
  8. =head1 SYNOPSIS
  9. my $tree = Data::Hierarchy->new();
  10. $tree->store ('/', {access => 'all'});
  11. $tree->store ('/private', {access => 'auth',
  12. '.note' => 'this is private});
  13. $info = $tree->get ('/private/somewhere/deep');
  14. # return actual data points in list context
  15. ($info, @fromwhere) = $tree->get ('/private/somewhere/deep');
  16. my @items = $tree->find ('/', {access => qr/.*/});
  17. # override all children
  18. $tree->store ('/', {'.note' => undef}, {override_sticky_descendents => 1});
  19. =head1 DESCRIPTION
  20. L<Data::Hierarchy> provides a simple interface for manipulating
  21. inheritable data attached to a hierarchical environment (like
  22. a filesystem).
  23. One use of L<Data::Hierarchy> is to allow an application to annotate
  24. paths in a real filesystem in a single compact data
  25. structure. However, the hierarchy does not actually need to correspond
  26. to an actual filesystem.
  27. Paths in a hierarchy are referred to in a Unix-like syntax; C<"/"> is
  28. the root "directory". (You can specify a different separator character
  29. than the slash when you construct a Data::Hierarchy object.) With the
  30. exception of the root path, paths should never contain trailing
  31. slashes. You can associate properties, which are arbitrary name/value
  32. pairs, with any path. (Properties cannot contain the undefined value.)
  33. By default, properties are inherited by child
  34. paths: thus, if you store some data at C</some/path>:
  35. $tree->store('/some/path', {color => 'red'});
  36. you can fetch it again at a C</some/path/below/that>:
  37. print $tree->get('/some/path/below/that')->{'color'};
  38. # prints red
  39. On the other hand, properties whose names begin with dots are
  40. uninherited, or "sticky":
  41. $tree->store('/some/path', {'.color' => 'blue'});
  42. print $tree->get('/some/path')->{'.color'}; # prints blue
  43. print $tree->get('/some/path/below/that')->{'.color'}; # undefined
  44. Note that you do not need to (and in fact, cannot) explicitly add
  45. "files" or "directories" to the hierarchy; you simply add and delete
  46. properties to paths.
  47. =cut
  48. =head1 CONSTRUCTOR
  49. Creates a new hierarchy object. Takes the following options:
  50. =over
  51. =item sep
  52. The string used as a separator between path levels. Defaults to '/'.
  53. =back
  54. =cut
  55. sub new {
  56. my $class = shift;
  57. my %args = (
  58. sep => '/',
  59. @_);
  60. my $self = bless {}, $class;
  61. $self->{sep} = $args{sep};
  62. $self->{hash} = {};
  63. $self->{sticky} = {};
  64. return $self;
  65. }
  66. =head1 METHODS
  67. =head2 Instance Methods
  68. =over
  69. =cut
  70. =item C<store $path, $properties, {%options}>
  71. Given a path and a hash reference of properties, stores the properties
  72. at the path.
  73. Unless the C<override_descendents> option is given with a false value,
  74. it eliminates any non-sticky property in a descendent of C<$path> with
  75. the same name.
  76. If the C<override_sticky_descendents> option is given with a true
  77. value, it eliminates any sticky property in a descendent of C<$path>
  78. with the same name. override it.
  79. A value of undef removes that value; note, though, that
  80. if an ancestor of C<$path> defines that property, the ancestor's value
  81. will be inherited there; that is, with:
  82. $t->store('/a', {k => 'top'});
  83. $t->store('/a/b', {k => 'bottom'});
  84. $t->store('/a/b', {k => undef});
  85. print $t->get('/a/b')->{'k'};
  86. it will print 'top'.
  87. =cut
  88. sub store {
  89. my $self = shift;
  90. $self->_store_no_cleanup(@_);
  91. $self->_remove_redundant_properties_and_undefs($_[0]);
  92. }
  93. # Internal method.
  94. #
  95. # Does everything that store does, except for the cleanup at the
  96. # end (appropriate for use in e.g. merge, which calls this a bunch of
  97. # times and then does cleanup at the end).
  98. sub _store_no_cleanup {
  99. my $self = shift;
  100. my $path = shift;
  101. my $props = shift;
  102. my $opts = shift || {};
  103. $self->_path_safe ($path);
  104. my %args = (
  105. override_descendents => 1,
  106. override_sticky_descendents => 0,
  107. %$opts);
  108. $self->_remove_matching_properties_recursively($path, $props, $self->{hash})
  109. if $args{override_descendents};
  110. $self->_remove_matching_properties_recursively($path, $props, $self->{sticky})
  111. if $args{override_sticky_descendents};
  112. $self->_store ($path, $props);
  113. }
  114. =item C<get $path, [$dont_clone]>
  115. Given a path, looks up all of the properteies (sticky and not) and
  116. returns them in a hash reference. The values are clones, unless you
  117. pass a true value for C<$dont_clone>.
  118. If called in list context, returns that hash reference followed by all
  119. of the ancestral paths of C<$path> which contain non-sticky properties
  120. (possibly including itself).
  121. =cut
  122. sub get {
  123. my ($self, $path, $dont_clone) = @_;
  124. $self->_path_safe ($path);
  125. my $value = {};
  126. my @datapoints = $self->_ancestors($self->{hash}, $path);
  127. for (@datapoints) {
  128. my $newv = $self->{hash}{$_};
  129. $newv = dclone $newv unless $dont_clone;
  130. $value = {%$value, %$newv};
  131. }
  132. if (exists $self->{sticky}{$path}) {
  133. my $newv = $self->{sticky}{$path};
  134. $newv = dclone $newv unless $dont_clone;
  135. $value = {%$value, %$newv}
  136. }
  137. return wantarray ? ($value, @datapoints) : $value;
  138. }
  139. =item C<find $path, $property_regexps>
  140. Given a path and a hash reference of name/regular expression pairs,
  141. returns a list of all paths which are descendents of C<$path>
  142. (including itself) and define B<at that path itself> (not inherited)
  143. all of the properties in the hash with values matching the given
  144. regular expressions. (You may want to use C<qr/.*/> to merely see if
  145. it has any value defined there.) Properties can be sticky or not.
  146. =cut
  147. sub find {
  148. my ($self, $path, $prop_regexps) = @_;
  149. $self->_path_safe ($path);
  150. my @items;
  151. my @datapoints = $self->_all_descendents($path);
  152. for my $subpath (@datapoints) {
  153. my $matched = 1;
  154. for (keys %$prop_regexps) {
  155. my $lookat = (index($_, '.') == 0) ?
  156. $self->{sticky}{$subpath} : $self->{hash}{$subpath};
  157. $matched = 0
  158. unless exists $lookat->{$_}
  159. && $lookat->{$_} =~ m/$prop_regexps->{$_}/;
  160. last unless $matched;
  161. }
  162. push @items, $subpath
  163. if $matched;
  164. }
  165. return @items;
  166. }
  167. =item C<merge $other_hierarchy, $path>
  168. Given a second L<Data::Hierarchy> object and a path, copies all the
  169. properties from the other object at C<$path> or below into the
  170. corresponding paths in the object this method is invoked on. All
  171. properties from the object this is invoked on at C<$path> or below are
  172. erased first.
  173. =cut
  174. sub merge {
  175. my ($self, $other, $path) = @_;
  176. $self->_path_safe ($path);
  177. my %datapoints = map {$_ => 1} ($self->_all_descendents ($path),
  178. $other->_all_descendents ($path));
  179. for my $datapoint (sort keys %datapoints) {
  180. my $my_props = $self->get ($datapoint, 1);
  181. my $other_props = $other->get ($datapoint);
  182. for (keys %$my_props) {
  183. $other_props->{$_} = undef
  184. unless defined $other_props->{$_};
  185. }
  186. $self->_store_no_cleanup ($datapoint, $other_props);
  187. }
  188. $self->_remove_redundant_properties_and_undefs;
  189. }
  190. =item C<to_relative $base_path>
  191. Given a path which B<every> element of the hierarchy must be contained
  192. in, returns a special Data::Hierarchy::Relative object which
  193. represents the hierarchy relative that path. The B<only> thing you can
  194. do with a Data::Hierarchy::Relative object is call
  195. C<to_absolute($new_base_path)> on it, which returns a new
  196. L<Data::Hierarchy> object at that base path. For example, if
  197. everything in the hierarchy is rooted at C</home/super_project> and it
  198. needs to be moved to C</home/awesome_project>, you can do
  199. $hierarchy = $hierarchy->to_relative('/home/super_project')->to_absolute('/home/awesome_project');
  200. (Data::Hierarchy::Relative objects may be a more convenient
  201. serialization format than Data::Hierarchy objects, if they are
  202. tracking the state of some relocatable resource.)
  203. =cut
  204. sub to_relative {
  205. my $self = shift;
  206. my $base_path = shift;
  207. return Data::Hierarchy::Relative->new($base_path, %$self);
  208. }
  209. # Internal method.
  210. #
  211. # Dies if the given path has a trailing slash and is not the root. If it is root,
  212. # destructively changes the path given as argument to the empty string.
  213. sub _path_safe {
  214. # Have to do this explicitly on the elements of @_ in order to be destructive
  215. if ($_[1] eq $_[0]->{sep}) {
  216. $_[1] = '';
  217. return;
  218. }
  219. my $self = shift;
  220. my $path = shift;
  221. my $location_of_last_separator = rindex($path, $self->{sep});
  222. return if $location_of_last_separator == -1;
  223. my $potential_location_of_trailing_separator = (length $path) - (length $self->{sep});
  224. return unless $location_of_last_separator == $potential_location_of_trailing_separator;
  225. require Carp;
  226. Carp::confess('non-root path has a trailing slash!');
  227. }
  228. # Internal method.
  229. #
  230. # Actually does property updates (to hash or sticky, depending on name).
  231. sub _store {
  232. my ($self, $path, $new_props) = @_;
  233. my $old_props = exists $self->{hash}{$path} ? $self->{hash}{$path} : undef;
  234. my $merged_props = {%{$old_props||{}}, %$new_props};
  235. for (keys %$merged_props) {
  236. if (index($_, '.') == 0) {
  237. defined $merged_props->{$_} ?
  238. $self->{sticky}{$path}{$_} = $merged_props->{$_} :
  239. delete $self->{sticky}{$path}{$_};
  240. delete $merged_props->{$_};
  241. }
  242. else {
  243. delete $merged_props->{$_}
  244. unless defined $merged_props->{$_};
  245. }
  246. }
  247. $self->{hash}{$path} = $merged_props;
  248. }
  249. # Internal method.
  250. #
  251. # Given a hash (probably $self->{hash}, $self->{sticky}, or their union),
  252. # returns a sorted list of the paths with data that are ancestors of the given
  253. # path (including it itself).
  254. sub _ancestors {
  255. my ($self, $hash, $path) = @_;
  256. my @ancestors;
  257. push @ancestors, '' if exists $hash->{''};
  258. # Special case the root.
  259. return @ancestors if $path eq '';
  260. my @parts = split m{\Q$self->{sep}}, $path;
  261. # Remove empty string at the front.
  262. my $current = '';
  263. unless (length $parts[0]) {
  264. shift @parts;
  265. $current .= $self->{sep};
  266. }
  267. for my $part (@parts) {
  268. $current .= $part;
  269. push @ancestors, $current if exists $hash->{$current};
  270. $current .= $self->{sep};
  271. }
  272. # XXX: could build cached pointer for fast traversal
  273. return @ancestors;
  274. }
  275. # Internal method.
  276. #
  277. # Given a hash (probably $self->{hash}, $self->{sticky}, or their union),
  278. # returns a sorted list of the paths with data that are descendents of the given
  279. # path (including it itself).
  280. sub _descendents {
  281. my ($self, $hash, $path) = @_;
  282. # If finding for everything, don't bother grepping
  283. return sort keys %$hash unless length($path);
  284. return sort grep {index($_.$self->{sep}, $path.$self->{sep}) == 0}
  285. keys %$hash;
  286. }
  287. # Internal method.
  288. #
  289. # Returns a sorted list of all of the paths which currently have any
  290. # properties (sticky or not) that are descendents of the given path
  291. # (including it itself).
  292. #
  293. # (Note that an arg of "/f" can return entries "/f" and "/f/g" but not
  294. # "/foo".)
  295. sub _all_descendents {
  296. my ($self, $path) = @_;
  297. $self->_path_safe ($path);
  298. my $both = {%{$self->{hash}}, %{$self->{sticky} || {}}};
  299. return $self->_descendents($both, $path);
  300. }
  301. # Internal method.
  302. #
  303. # Given a path, a hash reference of properties, and a hash reference
  304. # (presumably {hash} or {sticky}), removes all properties from the
  305. # hash at the path or its descendents with the same name as a name in
  306. # the given property hash. (The values in the property hash are
  307. # ignored.)
  308. sub _remove_matching_properties_recursively {
  309. my ($self, $path, $remove_props, $hash) = @_;
  310. my @datapoints = $self->_descendents ($hash, $path);
  311. for my $datapoint (@datapoints) {
  312. delete $hash->{$datapoint}{$_} for keys %$remove_props;
  313. delete $hash->{$datapoint} unless %{$hash->{$datapoint}};
  314. }
  315. }
  316. # Internal method.
  317. #
  318. # Returns the parent of a path; this is a purely textual operation, and is not necessarily a datapoint.
  319. # Do not pass in the root.
  320. sub _parent {
  321. my $self = shift;
  322. my $path = shift;
  323. return if $path eq q{} or $path eq $self->{sep};
  324. # For example, say $path is "/foo/bar/baz";
  325. # then $last_separator is 8.
  326. my $last_separator = rindex($path, $self->{sep});
  327. # This happens if a path is passed in without a leading
  328. # slash. This is really a bug, but old version of
  329. # SVK::Editor::Status did this, and we might as well make it not
  330. # throw unintialized value errors, since it works otherwise. At
  331. # some point in the future this should be changed to a plain
  332. # "return" or an exception.
  333. return '' if $last_separator == -1;
  334. return substr($path, 0, $last_separator);
  335. }
  336. # Internal method.
  337. #
  338. # Cleans up the hash and sticky by removing redundant properties,
  339. # undef properties, and empty property hashes.
  340. sub _remove_redundant_properties_and_undefs {
  341. my $self = shift;
  342. my $prefix = shift;
  343. # This is not necessarily the most efficient way to implement this
  344. # cleanup, but that can be fixed later.
  345. # By sorting the keys, we guarantee that we never get to a path
  346. # before we've dealt with all of its ancestors.
  347. for my $path (sort keys %{$self->{hash}}) {
  348. next if $prefix && index($prefix.$self->{sep}, $path.$self->{sep}) != 0;
  349. my $props = $self->{hash}{$path};
  350. # First check for undefs.
  351. for my $name (keys %$props) {
  352. if (not defined $props->{$name}) {
  353. delete $props->{$name};
  354. }
  355. }
  356. # Now check for redundancy.
  357. # The root can't be redundant.
  358. if (length $path) {
  359. my $parent = $self->_parent($path);
  360. my $parent_props = $self->get($parent, 1);
  361. for my $name (keys %$props) {
  362. # We've already dealt with undefs in $props, so we
  363. # don't need to check that for defined.
  364. if (defined $parent_props->{$name} and
  365. $props->{$name} eq $parent_props->{$name}) {
  366. delete $props->{$name};
  367. }
  368. }
  369. }
  370. # Clean up empty property hashes.
  371. delete $self->{hash}{$path} unless %{ $self->{hash}{$path} };
  372. }
  373. for my $path (sort keys %{$self->{sticky}}) {
  374. # We only have to remove undefs from sticky, since there is no
  375. # inheritance.
  376. my $props = $self->{sticky}{$path};
  377. for my $name (keys %$props) {
  378. if (not defined $props->{$name}) {
  379. delete $props->{$name};
  380. }
  381. }
  382. # Clean up empty property hashes.
  383. delete $self->{sticky}{$path} unless %{ $self->{sticky}{$path} };
  384. }
  385. }
  386. # These are for backwards compatibility only.
  387. sub store_recursively { my $self = shift; $self->store(@_, {override_sticky_descendents => 1}); }
  388. sub store_fast { my $self = shift; $self->store(@_, {override_descendents => 0}); }
  389. sub store_override { my $self = shift; $self->store(@_, {override_descendents => 0}); }
  390. package Data::Hierarchy::Relative;
  391. sub new {
  392. my $class = shift;
  393. my $base_path = shift;
  394. my %args = @_;
  395. my $self = bless { sep => $args{sep} }, $class;
  396. my $base_length = length $base_path;
  397. for my $item (qw/hash sticky/) {
  398. my $original = $args{$item};
  399. my $result = {};
  400. for my $path (sort keys %$original) {
  401. unless ($path eq $base_path or index($path, $base_path . $self->{sep}) == 0) {
  402. require Carp;
  403. Carp::confess("$path is not a child of $base_path");
  404. }
  405. my $relative_path = substr($path, $base_length);
  406. $result->{$relative_path} = $original->{$path};
  407. }
  408. $self->{$item} = $result;
  409. }
  410. return $self;
  411. }
  412. sub to_absolute {
  413. my $self = shift;
  414. my $base_path = shift;
  415. my $tree = { sep => $self->{sep} };
  416. for my $item (qw/hash sticky/) {
  417. my $original = $self->{$item};
  418. my $result = {};
  419. for my $path (keys %$original) {
  420. $result->{$base_path . $path} = $original->{$path};
  421. }
  422. $tree->{$item} = $result;
  423. }
  424. bless $tree, 'Data::Hierarchy';
  425. return $tree;
  426. }
  427. 1;
  428. =back
  429. =head1 AUTHORS
  430. Chia-liang Kao E<lt>clkao@clkao.orgE<gt>
  431. David Glasser E<lt>glasser@mit.eduE<gt>
  432. =head1 COPYRIGHT
  433. Copyright 2003-2006 by Chia-liang Kao E<lt>clkao@clkao.orgE<gt>.
  434. This program is free software; you can redistribute it and/or modify it
  435. under the same terms as Perl itself.
  436. See L<http://www.perl.com/perl/misc/Artistic.html>
  437. =cut