/SOFT/lib/SOFT.pm

https://github.com/sorokine/SOFT · Perl · 1497 lines · 976 code · 258 blank · 263 comment · 178 complexity · 2f65f20a991eb01752e71f4258624c96 MD5 · raw file

  1. package SOFT;
  2. use warnings;
  3. use strict;
  4. use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  5. require Exporter;
  6. @ISA = qw(Exporter);
  7. # Items to export into callers namespace by default. Note: do not export
  8. # names by default without a very good reason. Use EXPORT_OK instead.
  9. # Do not simply export all your public functions/methods/constants.
  10. @EXPORT = qw(
  11. );
  12. =head1 NAME
  13. SOFT - Perl module for processing files in Simple Ontology FormaT
  14. =head1 VERSION
  15. Version 0.5
  16. =cut
  17. our $VERSION = '0.5';
  18. use FileHandle;
  19. use File::Basename;
  20. use Carp;
  21. use Data::Dumper;
  22. use Text::Wrap;
  23. use HTML::Entities;
  24. use Convert::Color;
  25. use Convert::Color::HSV;
  26. use Text::CSV;
  27. use Tie::RegexpHash;
  28. # argument is a reference to the hash of option
  29. sub new {
  30. my $package = shift;
  31. my $self = {};
  32. # store options
  33. $self->{'opts'} = shift || {};
  34. $self->{'opts'}->{'verbose'} = 1 unless exists $self->{'opts'}->{'verbose'};
  35. # create class variables
  36. # each rel, ent, section hash has an entry 'src' for an array of source files and 'src_line' for an array of line numbers
  37. $self->{'rel'} = []; # list of relations
  38. # each relation is a hash with the following keys:
  39. # id - relation id
  40. # type - always 'rel'
  41. # from - entity id for from entity
  42. # to - to entity id for to entity
  43. # style - style object (optional)
  44. $self->{'ent'} = {}; # list of entities with their properties as subhashes, entity id is the key for this hash
  45. # each entity is a hash with the following keys:
  46. # id - entity id
  47. # type - entity type (cat|inst)
  48. # count - entity count
  49. # section - home section
  50. # style - style object (optional)
  51. $self->{'sections'} = {}; # list of sections
  52. # each entity is a hash with the following keys:
  53. # id - sections id (full section name delimited with | for each section level)
  54. # count - how many time the section has been parsed out from the file
  55. # type - always 'sec'
  56. # depth - section depth
  57. # members - a hash which keys are entity ids that belong to the section
  58. $self->{'includes'} = []; # stack of included soft files
  59. ### setting styles
  60. # styles entry in the SOFT object contains indices on style objects
  61. $self->{'styles'} = {
  62. 'by_id' => {}, # index by style id (all style must be indexed here)
  63. 'count' => 0 # total count of existing styles
  64. };
  65. # other style indexes has to be created by calling build_style_indices method after loading all style
  66. # style indices are stored in attributes named like cat_regex for assigning to categories by regex, cat_sect, inst_regex, etc.
  67. # each style object is a hash with the following entries:
  68. # id => '' style id, in the form like cat:id
  69. # ptrn => '' matching pattern
  70. # seq => '' sequence number in which styles has been loaded (used for prioritizing)
  71. # attrs => {} a hash of style attributes
  72. # src => '' source of the style (typically file name from which style has been loaded)
  73. # src_line => '' line in the source file
  74. # parent => '' parent style id from which current style has been extended
  75. my %style = ();
  76. # default style for relations
  77. %style = (
  78. 'id' => 'rel:',
  79. 'seq' => 0,
  80. 'attrs' => {
  81. 'style' => 'dashed',
  82. 'label' => '@ID@'
  83. },
  84. 'src' => __FILE__,
  85. 'src_line' => __LINE__
  86. );
  87. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  88. # default style for subcategory relation
  89. %style = (
  90. 'id' => 'rel:subcat',
  91. 'seq' => 1,
  92. 'attrs' => {
  93. 'style' => 'solid',
  94. 'arrowhead' => 'empty'
  95. },
  96. 'src' => __FILE__,
  97. 'src_line' => __LINE__
  98. );
  99. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  100. # default style for instantiation relation
  101. %style = (
  102. 'id' => 'rel:inst',
  103. 'seq' => 2,
  104. 'attrs' => {
  105. 'style' => 'solid',
  106. 'penwidth' => 2.0,
  107. 'weight' => 5.0,
  108. 'arrowhead' => 'dot'
  109. },
  110. 'src' => __FILE__,
  111. 'src_line' => __LINE__
  112. );
  113. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  114. # default style for categories
  115. %style = (
  116. 'id' => 'cat:',
  117. 'seq' => 3,
  118. 'attrs' => {
  119. 'shape' => 'box',
  120. 'label' => '@ID_STRING@',
  121. 'weight' => 5.0,
  122. '~shape' => 'record',
  123. '~label' => '{@ID_STRING@}|{%@PNAME@=@PVAL@%|%}'
  124. },
  125. 'src' => __FILE__,
  126. 'src_line' => __LINE__
  127. );
  128. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  129. # default style for instances
  130. %style = (
  131. 'id' => 'inst:',
  132. 'seq' => 4,
  133. 'attrs' => {
  134. 'shape' => 'box3d',
  135. 'penwidth' => 2.0,
  136. 'label' => '@ID_STRING@',
  137. '~style' => 'rounded',
  138. '~shape' => 'record',
  139. '~label' => '{@ID_STRING@}|{%@PNAME@=@PVAL@%|%}'
  140. },
  141. 'src' => __FILE__,
  142. 'src_line' => __LINE__
  143. );
  144. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  145. # default styles for sections
  146. %style = (
  147. 'id' => 'sec:', 'seq' => 5,, 'depth' => 0,
  148. 'attrs' => { 'label' => '@SECTION@' },
  149. 'src' => __FILE__, 'src_line' => __LINE__
  150. );
  151. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  152. %style = (
  153. 'id' => 'sec:1:', 'seq' => 6, 'depth' => 1,
  154. 'attrs' => { 'label' => '@SECTION@', 'color' => 'gray10', 'fontcolor' => 'gray10', 'labelloc' => 'b' },
  155. 'src' => __FILE__, 'src_line' => __LINE__
  156. );
  157. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  158. %style = (
  159. 'id' => 'sec:2:', 'seq' => 7, 'depth' => 2,
  160. 'attrs' => { 'label' => '@SECTION@', 'color' => 'gray20', 'fontcolor' => 'gray20', 'labelloc' => 'b' },
  161. 'src' => __FILE__, 'src_line' => __LINE__
  162. );
  163. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  164. %style = (
  165. 'id' => 'sec:3:', 'seq' => 8, 'depth' => 3,
  166. 'attrs' => { 'label' => '@SECTION@', 'color' => '#8547FF', 'fontcolor' => '#8547FF', 'labelloc' => 'b' },
  167. 'src' => __FILE__, 'src_line' => __LINE__
  168. );
  169. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  170. %style = (
  171. 'id' => 'sec:4:', 'seq' => 9, 'depth' => 4,
  172. 'attrs' => { 'label' => '@SECTION@', 'color' => 'gray40', 'fontcolor' => 'gray40', 'labelloc' => 'b' },
  173. 'src' => __FILE__, 'src_line' => __LINE__
  174. );
  175. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  176. %style = (
  177. 'id' => 'sec:5:', 'seq' => 10, 'depth' => 5,
  178. 'attrs' => { 'label' => '@SECTION@', 'color' => 'darkgreen', 'fontcolor' => 'darkgreen', 'labelloc' => 'b' },
  179. 'src' => __FILE__, 'src_line' => __LINE__
  180. );
  181. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  182. %style = (
  183. 'id' => 'sec:6:', 'seq' => 11, 'depth' => 6,
  184. 'attrs' => { 'label' => '@SECTION@', 'color' => 'red', 'fontcolor' => 'red', 'labelloc' => 'b' },
  185. 'src' => __FILE__, 'src_line' => __LINE__
  186. );
  187. $self->{'styles'}->{'by_id'}->{$style{'id'}} = { %style };
  188. $self->{'styles'}->{'count'} = scalar keys %{$self->{'styles'}->{'by_id'}}; # update style count
  189. return bless($self, $package);
  190. }
  191. ### methods for access to ontology entries
  192. # retrieve the entity from ontology
  193. # expects a string representing entity names, e.g., 'cat:entity'
  194. # returns a reference to hash array directly in the datastore or
  195. # undef if entity not found (no error is raised)
  196. sub get {
  197. my $self = shift;
  198. my $ent = shift;
  199. return $self->{'ent'}->{$ent};
  200. }
  201. # lists all entities in the ontology
  202. # TODO: add the condition to select entities (use same rules as in assigning styles)
  203. # a list of strings representing entities is returned (in the form type:entity)
  204. sub all {
  205. my $self = shift;
  206. return keys %{$self->{'ent'}};
  207. }
  208. # adds a new entity to ontology
  209. # arguments
  210. # entity name in the for type:name, e.g.: cat:entity, required
  211. # section (may be empty)
  212. # source file (may be empty)
  213. # line in the source file (must be empty if source file is empty)
  214. # returns
  215. # a reference to the hash array containing the entity
  216. # fails
  217. # if entity already exists
  218. # if entity name is not parseable
  219. # if section has not been created before
  220. sub add {
  221. my ($self, $ent, $section, $src_file, $src_line) = (@_);
  222. croak "Attempt to create an existing entity ($ent)" if $self->get($ent);
  223. $ent =~ m/^([^:]+):(\S+)$/o ||
  224. croak "Unable to parse entity name: $ent (should be in form 'type:name') [$src_file:$src_line]";
  225. my ($type,$id) = ($1, $2);
  226. $type =~ m/^(cat|inst)$/ ||
  227. croak "Unrecognized entity type: $type (should be cat or inst) [$src_file:$src_line]";
  228. my %e = ( 'id' => $id, 'type' => $type );
  229. if ($section) {
  230. confess "Section $section was not found in ontology while creating entity $ent"
  231. unless exists $self->{'sections'}->{$section};
  232. $e{'section'} = $section;
  233. $self->{'sections'}->{$section}->{'members'}->{$ent} = 1;
  234. }
  235. if ($src_file) {
  236. $e{'src'} = [ $src_file ];
  237. $e{'src_line'} = [ $src_line ] ||
  238. croak "No source line provided for entity $ent";
  239. } else {
  240. $e{'src'} = [];
  241. $e{'src_line'} = [];
  242. carp "Source file not specified for '$ent'" if $self->{'opts'}->{'verbose'};
  243. }
  244. return $self->{'ent'}->{$ent} = \%e;
  245. }
  246. # updates an existing entity in ontology (fails if entity does not exists)
  247. # arguments
  248. # entity name in the for type:name, e.g.: cat:entity, required
  249. # section (may be empty)
  250. # source file (may be empty)
  251. # line in the source file (must be empty if source file is empty)
  252. # returns
  253. # a reference to the hash array containing the entity
  254. # fails
  255. # if entity does not exist
  256. # if entity name is not parseable
  257. # if section has not been created before
  258. sub update {
  259. my ($self, $ent, $section, $src_file, $src_line) = (@_);
  260. my $e = $self->get($ent) ||
  261. croak "SOFT: entity to be updated was not found ($ent)";
  262. if ($section) {
  263. confess "Section $section was not found in ontology while creating entity $ent"
  264. unless exists $self->{'sections'}->{$section};
  265. if ($e->{'section'} && $e->{'section'} ne $section) {
  266. # if section is different from already specified update other section data and complain if necessary
  267. delete $self->{'sections'}->{$section}->{'members'}->{$ent};
  268. carp "SOFT: Section name update for $ent (".$e->{'section'}." = $section)"
  269. if $self->{'opts'}->{'verbose'};
  270. }
  271. $e->{'section'} = $section;
  272. $self->{'sections'}->{$section}->{'members'}->{$ent} = 1;
  273. }
  274. if ($src_file) {
  275. push @{$e->{'src'}}, $src_file;
  276. push @{$e->{'src_line'}}, $src_line ;
  277. }
  278. return $e;
  279. }
  280. # updates and if it exists, adds a new entity if not
  281. # arguments are passed directly to add or update
  282. sub add_or_update {
  283. my $self = shift;
  284. my $ent = $_[0];
  285. return $self->get($ent) ? $self->update(@_) : $self->add(@_);
  286. }
  287. # deletes an entity from ontology
  288. # arguments:
  289. # entity name in the form cat:name
  290. # returns
  291. # hash array of the deleted entity or undef on failure
  292. sub del {
  293. my $self = shift;
  294. my $ent = shift;
  295. my $e = $self->get($ent) || return undef;
  296. # cleanup relations
  297. $self->{'rel'} = [ grep { $_->{'to'} ne $ent && $_->{'from'} ne $ent } @{$self->{'rel'}} ];
  298. # remove references from sections
  299. foreach my $sec (keys %{$self->{'sections'}}) {
  300. delete $self->{'sections'}->{$sec}->{'members'}->{$ent};
  301. }
  302. # remove the entity
  303. delete $self->{'ent'}->{$ent};
  304. return $e;
  305. }
  306. # retrieve a property value from an entity
  307. # arguments:
  308. # entity name in the form cat:name
  309. # property name
  310. # return
  311. # a string connecting the property value or undef if no such property or
  312. # entity has no properties
  313. # the method will croak if no entity does not exist
  314. sub get_prop {
  315. my ($self, $ent, $prop) = (@_);
  316. my $e = $self->get($ent) ||
  317. croak "Retrieving a property ($prop) from a non-existent entity ($ent) attempted";
  318. return undef unless exists $e->{'properties'};
  319. return $e->{'properties'}->{$prop} if exists $e->{'properties'}->{$prop};
  320. return undef;
  321. }
  322. # retrieve an entity id
  323. # arguments:
  324. # entity name in the form cat:name
  325. # return
  326. # a string connecting an entity id or undef if entity does not exist
  327. sub get_id {
  328. my ($self, $ent) = (@_);
  329. my $e = $self->get($ent) || return undef;
  330. return $e->{'id'};
  331. }
  332. # TODO: del_all delete all entities satisfying certain selection criteria
  333. ### methods for loading ontology files
  334. sub parse_soft {
  335. my $self = shift;
  336. my $fname = shift || croak "File name must be provided";
  337. my $csection_ref = shift || []; # reference to an array of section name under which to include parsed SOFT file
  338. # check for cyclical includes
  339. foreach (@{$self->{includes}}) {
  340. croak "Cyclical include of $fname, include/comprise stack: ".join("<-", @{$self->{includes}}) if m/$fname/;
  341. }
  342. push @{$self->{includes}}, $fname;
  343. # open input file for reading, check the directory name
  344. my $fh = undef;
  345. my $basedir = '';
  346. if ($fname eq '-') {
  347. $fh = \*STDIN;
  348. } else {
  349. $fh = new FileHandle "<$fname";
  350. die "Unable to open '$fname'" unless $fh; # TODO print include stack here
  351. $basedir = dirname($fname);
  352. # fix the basedir name to handle ending / properly
  353. $basedir = '' if $basedir eq '.';
  354. $basedir .= '/' if $basedir && $basedir !~ m|/$|o;
  355. }
  356. my $c = -1; # line counter
  357. my @ditto = (); # array of parsed tokens (up to 3 tokens: ent1 -rel-> ent2) for ditto operation
  358. my $prev_line = undef; # previous line for processing line continuations
  359. my $fsec; # full section name
  360. my @csection = @$csection_ref;
  361. my $addepth = @csection; # additional section depth inherited from upper file
  362. # parse
  363. while (<$fh>) {
  364. chomp;
  365. $c++;
  366. # get rid of leading and trailing space
  367. s/^\s*//o;
  368. s/\s*$//o;
  369. # if there is previous line then prepend it to current line
  370. $_ = "$prev_line $_" if $prev_line;
  371. # if the line ends up in \ the assign it to continuations
  372. if (s/\\$//o) {
  373. $prev_line = $_;
  374. next;
  375. }
  376. $prev_line = undef;
  377. # get rid of comments
  378. s/#.*$//o;
  379. # get rid of empty lines
  380. next if /^\s*$/o;
  381. # process \include and \comprise
  382. if (m/^\\(include|comprise|styles|tuples)\s+([\S]+)(?:\s+(.*))?/o) {
  383. my $op = $1;
  384. my $incname = $2;
  385. my $opts = $3;
  386. if ($op eq 'include') {
  387. $self->parse_soft($basedir.$incname);
  388. @csection = (undef);
  389. } elsif ($op eq 'comprise') {
  390. $self->parse_soft($basedir.$incname, \@csection);
  391. } elsif ($op eq 'styles') {
  392. $self->parse_styles($basedir.$incname);
  393. } elsif ($op eq 'tuples') {
  394. my $lcol=0;
  395. my $ltype='cat';
  396. if ($opts) {
  397. $opts =~ m/^(\w+):(cat|inst)/o ||
  398. die "Unable to parse \\tuples directive on line $c in file $fname";
  399. $lcol = $1;
  400. $ltype = $2;
  401. }
  402. $self->load_tuples( $basedir.$incname, $lcol, $ltype );
  403. } else {
  404. croak "Directive $_ not recognized on line $c in file $fname";
  405. }
  406. @ditto = ();
  407. next;
  408. }
  409. # Process Section
  410. if (m/^(\[+)([^\[\]]+)/o) {
  411. my $d = length($1) - 1 + $addepth;
  412. splice @csection, $d if @csection > $d;
  413. $csection[$d] = $2;
  414. # record full section name into section list
  415. $fsec = join '|', @csection;
  416. $self->{'sections'}->{$fsec} = { 'type'=>'sec', 'id'=>$fsec, 'depth'=>_sec_lev($fsec), 'src'=>[], 'src_line'=>[], 'count'=>0, 'members'=>{} }
  417. unless exists $self->{'sections'}->{$fsec};
  418. push @{ $self->{'sections'}->{$fsec}->{'src'} }, $fname;
  419. push @{ $self->{'sections'}->{$fsec}->{'src_line'} }, $c;
  420. $self->{'sections'}->{$fsec}->{'count'}++;
  421. @ditto = (); # no ditto for sections
  422. next;
  423. }
  424. # process ditto
  425. if (m/~/o) {
  426. croak "Ditto character found but no line to copy from" unless @ditto;
  427. my (@tokens) = (m/^([^~\s]+)\s*([^~\s]*)\s*~/o);
  428. croak "Ditto character found but not able to make sense of it, line #$c in $fname" unless @tokens;
  429. pop @tokens unless $tokens[$#tokens]; # remove last token if it is empty
  430. $_ = join ' ', (@tokens, @ditto[$#tokens+1..$#ditto]);
  431. }
  432. @ditto = split /\s+/;
  433. # cat:texture -subcat-> cat:property
  434. my ( $nosec1, $ent1, $dir_rev, $rel, $dir_fwd, $nosec2, $ent2 ) =
  435. (m/^(\*?)(\S+)\s+(<?)-(\S+)-(>?)\s+(\*?)(\S+)/o);
  436. defined $ent1 || croak "Failed to parse line #$c in file $fname ($_)";
  437. ( $dir_rev || $dir_fwd ) || croak "Undirected relations are not allowed, line #$c ($_)\n";
  438. $self->add_or_update( $ent1, $nosec1 ? undef : $fsec, $fname, $c);
  439. $self->add_or_update( $ent2, $nosec2 ? undef : $fsec, $fname, $c);
  440. # store relation
  441. my $rel_object = $dir_fwd ?
  442. {
  443. 'from' => $ent1,
  444. 'type' => 'rel',
  445. 'id' => $rel,
  446. 'to' => $ent2
  447. } : {
  448. 'to' => $ent1,
  449. 'type' => 'rel',
  450. 'id' => $rel,
  451. 'from' => $ent2
  452. };
  453. $rel_object->{'src'} = [$fname];
  454. $rel_object->{'src_line'} = [$c];
  455. push @{$self->{rel}}, $rel_object;
  456. }
  457. $fh->close() unless $fname eq '-';
  458. pop @{$self->{includes}};
  459. carp "Continuation line not found in $fname line #$c" if $prev_line; # complain if the last line ends with \
  460. }
  461. # parse a style file
  462. # this method only loads styles into {'styles'}->{'by_id'} hash, run build_style_indices before using write_gv
  463. sub parse_styles {
  464. my $self = shift;
  465. my $fname = shift || croak "File name must be provided";
  466. my $fh = new FileHandle "<$fname";
  467. die "Unable to open style file '$fname'" unless $fh;
  468. # extract base directory name if any
  469. my $basedir = dirname($fname);
  470. # fix the basedir name to handle ending / properly
  471. $basedir = '' if $basedir eq '.';
  472. $basedir .= '/' if $basedir && $basedir !~ m|/$|o;
  473. my $c = -1; # line counter
  474. my $prev_line = undef; # previous line for processing line continuations
  475. my $instyle = undef; # flag showing if parser is the style definition section
  476. # parsing variables for style specification
  477. my $style_id = undef; # style identifier
  478. my $ptrn_type = undef; # entity type to which pattern applies, eg. rel:, cat:, ...
  479. my $ptrn = undef; # pattern to match style with applicable entities
  480. my $parent = undef; # entry from which style derives
  481. my $sec_depth = 0; # section depth for section styles
  482. my %style = (); # hash for style object with the following members:
  483. # parse
  484. while (<$fh>) {
  485. chomp;
  486. $c++;
  487. # get rid of leading and trailing space
  488. s/^\s*//o;
  489. s/\s*$//o;
  490. # if there is previous line then prepend it to current line
  491. $_ = "$prev_line $_" if $prev_line;
  492. # if the line ends up in \ the assign it to continuations
  493. if (s/\\$//o) {
  494. $prev_line = $_;
  495. next;
  496. }
  497. $prev_line = undef;
  498. # get rid of comments
  499. s/^#.*$//o;
  500. # get rid of empty lines
  501. next if /^\s*$/o;
  502. unless ($instyle) {
  503. # \style[=(rel|cat|inst|sec):ID] (rel|cat|inst|sec):pattern [extends (rel|cat|inst|sec):<other_style_ID>]
  504. my $idre = qr/(?:rel|cat|inst|sec):[^\s]*/o; # regex for entry ID
  505. if (m/^\\style(?:=($idre))?\s+($idre)(?:\s+extends\s+($idre))?/o) {
  506. $style_id = $1 || $2; # style id defaults to pattern if not specified, prepended with type, e.g. cat:id
  507. ( $ptrn_type, $ptrn ) = split ':', $2, 2;
  508. $parent = $3;
  509. # extract section depth if present
  510. if ($ptrn_type eq 'sec') {
  511. $ptrn =~ m/^(?:(\d+):)(.+)$/o;
  512. $sec_depth = $1 || 0;
  513. $ptrn = $2;
  514. }
  515. # insert basedir name into assignment-by file name pattern
  516. $ptrn =~ s/^@/\@$basedir/ if $ptrn && $basedir;
  517. %style = (
  518. 'id' => $style_id,
  519. 'seq' => $self->{'styles'}->{'count'}++,
  520. 'type' => $ptrn_type,
  521. 'ptrn' => $ptrn || '',
  522. 'attrs' => {},
  523. 'src' => $fname,
  524. 'src_line' => $c
  525. );
  526. $style{'depth'} = $sec_depth if $ptrn_type eq 'sec';
  527. $style{'parent'} = $parent if $parent;
  528. if ($parent) {
  529. die "Superstyle not found for line $c in $fname ($_)" unless exists $self->{'styles'}->{'by_id'}->{$parent};
  530. $style{'attrs'} = { %{$self->{'styles'}->{'by_id'}->{$parent}->{'attrs'}} };
  531. }
  532. $instyle = 1;
  533. } else { # not inside style defintion
  534. die "Failed to parse style header line $c in file $fname ($_)";
  535. }
  536. } else {
  537. if (m/^\\style\s*$/o) { # end of style defintion found
  538. # all styles has to be added to by_id index
  539. $self->{'styles'}->{'by_id'}->{$style_id} = {%style};
  540. # reset parsing
  541. %style = ();
  542. $instyle = undef;
  543. } elsif (m/([^=]+)=(.*)/o) { # parse style attributes
  544. $style{'attrs'}->{$1} = $2;
  545. } else { # hui znaet chto
  546. croak "Failed to parse style attribute line $c in file $fname ($_)";
  547. }
  548. }
  549. }
  550. die "Unfinished style defintion at the end of file $fname" if $instyle;
  551. $fh->close();
  552. }
  553. # static method that creates a regexhash
  554. sub _create_regexhash {
  555. my %by_regex;
  556. tie %by_regex, 'Tie::RegexpHash';
  557. return \%by_regex;
  558. }
  559. # builds style indices
  560. sub build_style_indices {
  561. my $self = shift;
  562. # fill in the appropriate indexes in the styles tables
  563. # styles has to be added to regex hash in the order opposite to which it has been loaded
  564. foreach my $s (sort { $b->{'seq'} <=> $a->{'seq'} } values %{$self->{'styles'}->{'by_id'}}) {
  565. #print "Style loaded: ".$s->{'id'}." #".$s->{'seq'}." from ".$s->{'src'}." line ".$s->{'src_line'}."\n";
  566. $s->{'ptrn'} ||= ''; # some of the styles may have patter undefined, in that case substitute is with
  567. # several patterns can be concatenated with a comma, each of the concatenated patterns will be added to corresponding index
  568. foreach (reverse split ',', $s->{'ptrn'}) {
  569. if (m/^\[(.+)\]$/o) { # assignment by section
  570. # create an index hash if doe not exist
  571. my $ind_name = $s->{'type'}.'_'.'sect';
  572. $self->{'styles'}->{$ind_name} = _create_regexhash ($self, $ind_name)
  573. unless exists $self->{'styles'}->{$ind_name};
  574. $self->{'styles'}->{$ind_name}->{ qr/(?:^|\|)$1(?:$|\|)/ } = $s;
  575. } elsif (m/^@(\S+)/o) { # assignment by source
  576. # create an index hash if doe not exist
  577. my $ind_name = $s->{'type'}.'_src';
  578. $self->{'styles'}->{$ind_name} = {}
  579. unless exists $self->{'styles'}->{$ind_name};
  580. # check if the source files exists and is readable
  581. -f $1 || croak "Unable to find file '$1' specified in style ".$s->{'id'}." loaded from ".$s->{'src'}." line ".$s->{'src_line'};
  582. $self->{'styles'}->{$ind_name}->{$1} = $s;
  583. } elsif (m/<?-\w+->?/o) { # assignment by relation
  584. # create an index hash if doe not exist
  585. my $ind_name = $s->{'type'}.'_rel';
  586. $self->{'styles'}->{$ind_name} = {}
  587. unless exists $self->{'styles'}->{$ind_name};
  588. $self->{'styles'}->{$ind_name}->{$_} = $s;
  589. } elsif ( ! m/^\/.+\/$/o ) { # everything else that is not regex
  590. # convert glob symbols to regex
  591. s/\*/.*/og;
  592. s/\?/.?/og;
  593. # convert it into a trivial "match exact word" regex
  594. $_ = '/^'.$_.'$/';
  595. }
  596. if (m/^\/.+\/$/o) { # then this is regex
  597. # create an index hash if doe not exist
  598. my $ind_name = $s->{'type'}.'_regex';
  599. $self->{'styles'}->{$ind_name} = _create_regexhash ($self, $ind_name)
  600. unless exists $self->{'styles'}->{$ind_name};
  601. my $qr = eval "qr$_";
  602. $@ && croak "Failed to parse a regex in $_ ($@)";
  603. $self->{'styles'}->{$ind_name}->{ $qr } = $s;
  604. }
  605. }
  606. }
  607. }
  608. # load tuples
  609. # arguments are
  610. # file name to load from
  611. # a column (name or number from 0) to link from
  612. # type on entities to link to (cat or inst)
  613. # hash with options
  614. # - header=1 the tuple file contains a header
  615. sub load_tuples {
  616. my ($self, $fname, $lcol, $ltype, $opts) = (@_);
  617. confess "No tuple file name has been provided" unless $fname;
  618. confess "Linking column and/or linking entity type has not been defined for tuple file '$fname'"
  619. unless defined $lcol && $ltype;
  620. $opts = $self->{'opts'} unless $opts;
  621. $opts = $self->{'opts'} unless $opts;
  622. $opts->{'header'} = 1 unless exists $opts->{'header'};
  623. my $fh = new FileHandle "<$fname";
  624. die "Unable to open tuple file '$fname'" unless $fh;
  625. binmode $fh, ':encoding(UTF-8)';
  626. my $csv = Text::CSV->new();
  627. my $c = -1; # line counter
  628. my $prev_line = undef; # previous line for processing line continuations
  629. my @prop_names = (); # a list of property names
  630. # parse
  631. while (<$fh>) {
  632. chomp;
  633. $c++;
  634. # get rid of leading and trailing space
  635. s/^\s*//o;
  636. s/\s*$//o;
  637. # if there is previous line then prepend it to current line
  638. $_ = "$prev_line $_" if $prev_line;
  639. # if the line ends up in \ the assign it to continuations
  640. if (s/\\$//o) {
  641. $prev_line = $_;
  642. next;
  643. }
  644. $prev_line = undef;
  645. # get rid of comments
  646. s/^#.*$//o;
  647. # get rid of empty lines
  648. next if /^\s*$/o;
  649. my @columns;
  650. if ($csv->parse($_)) {
  651. @columns = $csv->fields();
  652. } else {
  653. my $err = $csv->error_input;
  654. print "Failed to parse line $c in the CSV file $fname: $err";
  655. }
  656. # setting property names, reading column name from the file
  657. unless (@prop_names) {
  658. if ($opts->{'header'}) { # first line contains column names
  659. @prop_names = @columns;
  660. if ($lcol !~ /^\d+$/o) { # if linking column is not a number then find it in the list of column names
  661. my $icol = 0;
  662. foreach (@prop_names) {
  663. last if m/$lcol/;
  664. $icol++;
  665. }
  666. croak "Unable to find link column name ($lcol) in the header of the csv file $fname" if $icol > $#prop_names;
  667. $lcol = $icol;
  668. } else {
  669. # check that $lcol is within the range
  670. croak "Linking column number ($lcol) is large than the number of columns in the file header ".scalar(@prop_names)
  671. if $lcol - 1 > @prop_names;
  672. }
  673. next;
  674. } else { # no column names in the first line -- simply use numbers
  675. @prop_names = (0..$#columns);
  676. croak "Link column must be a number unless csv file has column names in the first line (current link column $lcol)" unless $lcol =~ /^\d+$/o;
  677. }
  678. }
  679. print STDERR "WARNING: line $c in $fname has different number of columns than the file header (".scalar(@columns)." found but header had ".scalar(@prop_names)." columns)\n" unless
  680. $#columns == $#prop_names;
  681. croak "Zero-length string in link column on line $c in $fname" unless $columns[$lcol];
  682. # creating a new entity or checking the existing one
  683. my $e = $self->add_or_update("$ltype:".$columns[$lcol], undef, $fname, $c );
  684. # create or increase tuple count
  685. if ($e->{'tuple_count'}) {
  686. $e->{'tuple_count'}++;
  687. } else {
  688. $e->{'tuple_count'} = 1;
  689. }
  690. # loading values into properties
  691. # TODO: move to property handling functions
  692. $e->{'properties'} = {} unless exists $e->{'properties'};
  693. foreach my $i (0..$#columns) {
  694. last if $i > $#prop_names; # ignore remaining columns
  695. $e->{'properties'}->{$prop_names[$i]} = $columns[$i];
  696. }
  697. } # end of parsing loop
  698. $fh->close();
  699. }
  700. # exclude entities from the ontology
  701. # TODO: exclude by pattern like in style assignment
  702. sub exclude {
  703. my $self = shift;
  704. foreach (@_) { # expects an array of entities to be exclude in the form cat:category, rel:rel1, ....
  705. if (m/^rel:(.+)/o) {
  706. $self->{'rel'} = [ grep { $_->{'id'} ne $1 } @{$self->{'rel'}} ];
  707. } else {
  708. # entities
  709. $self->del($_) ||
  710. print STDERR "WARNING: exclude entity '$_' was not found in ontology (ignored)\n";
  711. }
  712. }
  713. }
  714. # exclude entities from the ontology that are not in the provided list
  715. # TODO: include by pattern like in style assignment
  716. sub only {
  717. my $self = shift;
  718. my %ecats = (%{$self->{'ent'}}); # list of categories to exclude
  719. my $have_cats = 0;
  720. foreach (@_) { # expects an array of entities to be exclude in the form cat:category, rel:rel1, ....
  721. if (m/^rel:(.+)/o) {
  722. $self->{'rel'} = [ grep { $_->{'id'} eq $1 } @{$self->{'rel'}} ];
  723. } else {
  724. delete $ecats{$_} if exists $ecats{$_};
  725. $have_cats = 1;
  726. }
  727. }
  728. $self->exclude(keys %ecats) if $have_cats;
  729. }
  730. # remove entities that do not have any relations
  731. sub kill_orphans {
  732. my $self = shift;
  733. my @orphans = ();
  734. ENT: foreach my $k (keys %{$self->{'ent'}}) {
  735. foreach my $r (@{$self->{'rel'}}) {
  736. next ENT if ($r->{'to'} eq $k || $r->{'from'} eq $k);
  737. }
  738. push @orphans, $k;
  739. }
  740. $self->exclude( @orphans );
  741. }
  742. # static method that returns section level number
  743. sub _sec_lev {
  744. my $s = shift || return 0;
  745. return ($s =~ tr/|/|/) + 1;
  746. }
  747. # assign colors to the entities according the section
  748. sub colorize_by_section {
  749. my $self = shift;
  750. my $method = shift || confess "Section coloring method not specified in the function call\n";
  751. # method can be either 'rainbow' or 'random'
  752. my $upper = shift || ''; # upper section name, must be empty on the 1st run
  753. my $hue0 = shift || 0.0; # interval of hues on which to operate on
  754. my $hue1 = shift || 360.0;
  755. # count level as the number of section delimiter '|' in the upper section name
  756. my $lev = _sec_lev($upper);
  757. # on the 1st run calculate the number of levels
  758. unless ($lev) {
  759. $self->{'sec_legend'} = {} unless exists $self->{'sec_legend'};
  760. $self->{'_maxlev'} = 0;
  761. foreach (keys %{$self->{'sections'}}) {
  762. my $lev2 = _sec_lev($_);
  763. $self->{'_maxlev'} = $lev2 if $lev2 > $self->{'_maxlev'};
  764. }
  765. }
  766. my @sec_list = sort grep { (_sec_lev($_) == $lev + 1 && m/^$upper/o)} keys %{$self->{'sections'}};
  767. if (@sec_list) {
  768. my $hue_inc = ($hue1 - $hue0) / @sec_list;
  769. for(my $i=0; $i < @sec_list; $i++ ) {
  770. # create color for current section
  771. my $hue = $hue0;
  772. if ( $method eq 'random' ) {
  773. $hue = rand 360.0; #$hue0 + $i * $hue_inc;
  774. } elsif ( $method eq 'rainbow' ) {
  775. $hue = $hue0 + $i * $hue_inc;
  776. } else {
  777. confess "Unknown section coloring method '$method'\n";
  778. }
  779. my $sat = ($lev+1)/$self->{'_maxlev'} / 2.0;
  780. my $color = Convert::Color::HSV->new( $hue, $sat, 1.0 );
  781. #print STDERR $color->as_rgb8->hex." H=$hue S=$sat V=1.0\n";
  782. $self->{'sec_legend'}->{$sec_list[$i]} = $color->as_rgb8->hex;
  783. #$self->{'sec_legend'}->{$sec_list[$i]} = "H=$hue S=$sat V=1.0";
  784. # assign style for each entity in the section
  785. foreach my $ent (keys %{$self->{'sections'}->{$sec_list[$i]}->{'members'}}) {
  786. $self->{'ent'}->{$ent}->{'style'} = {
  787. 'style' => 'filled',
  788. 'fillcolor' => "#".$color->as_rgb8->hex
  789. } if exists $self->{'ent'}->{$ent};
  790. }
  791. # do subsections
  792. $self->colorize_by_section( $method, $sec_list[$i], $hue0 + $i * $hue_inc, $hue0 + $i * $hue_inc + $hue_inc);
  793. }
  794. }
  795. }
  796. # write graphviz gv file
  797. # options are file name and a hash with options
  798. # the options are:
  799. # gvopts -- string with GV language options
  800. # sectotl -- render section outlines (1/0)
  801. # properties -- show properties on the entity output
  802. # comments -- a string that will be printed in the beginning on the GV file as a GV language comment
  803. sub write_gv {
  804. my $self = shift;
  805. my $fname = shift || croak "File name must be provided";
  806. my $opts = shift || { 'sectotl' => 1 }; # second argument is hash with options
  807. confess "No styles indices found while attempting to write a gv file"
  808. unless exists $self->{'styles'}->{'by_id'};
  809. my $fh = undef;
  810. if ($fname eq '-') {
  811. $fh = \*STDOUT;
  812. } else {
  813. $fh = new FileHandle ">$fname" || die "Unable to open '$fname' for writing";
  814. }
  815. binmode $fh, ':encoding(UTF-8)';
  816. print $fh "/* This file was auto-generated on ".localtime()."\n using SOFT.pm library version $VERSION. ";
  817. print $fh $opts->{'comments'}."\n" if ($opts->{'comments'});
  818. print $fh "*/\n";
  819. print $fh "digraph G {\n";
  820. print $fh "\t".$opts->{'gvopts'}."\n" if exists $opts->{'gvopts'} && $opts->{'gvopts'};
  821. if (exists $opts->{'sectotl'} && $opts->{'sectotl'}) {
  822. # output section boxes if requested
  823. # TODO omit sections with no entities in any subsection
  824. print $fh "\t/* subgraphs */";
  825. my $ck = 0;
  826. my $pd = -1;
  827. foreach my $k (sort keys %{$self->{sections}}) {
  828. #print STDERR "$k\n";
  829. my $cd = ($k =~ tr/|//); # current depth
  830. $k =~ m/([^|]+)$/o;
  831. my $label = $1;
  832. foreach my $l (reverse $cd..$pd) {
  833. print $fh "\t" x $l . "\t}\n";
  834. }
  835. # TODO replace with make_style
  836. print $fh "\n" . "\t" x $cd . "\tsubgraph cluster".$ck++." {\n";
  837. print $fh "\t" x $cd . "\t\t".$self->make_style( $self->{'sections'}->{$k} ).";\n";
  838. print $fh join "", map { "\t" x $cd . "\t\t\"$_\";\n" } keys %{$self->{'sections'}->{$k}->{'members'}}
  839. if keys %{$self->{'sections'}->{$k}->{'members'}};
  840. $pd = $cd;
  841. }
  842. foreach my $l (reverse 0..$pd) {
  843. print $fh "\t" x $l . "\t}\n";
  844. }
  845. print $fh "\n";
  846. }
  847. print $fh "\t/* node attributes */\n";
  848. foreach my $key (sort $self->all()) {
  849. print $fh "\t\"$key\" [".$self->make_style( $self->get($key) )."];\n";
  850. }
  851. print $fh "\n";
  852. print $fh "\t/* relations with attributes */\n";
  853. foreach my $r (@{$self->{rel}}) {
  854. print $fh "\t\"".$r->{'from'}."\" -> \"".$r->{'to'}."\" [".$self->make_style($r)."];\n";
  855. }
  856. print $fh "}\n";
  857. $fh->close() unless $fname eq '-';
  858. }
  859. # creates a GV style string given an entity or relation
  860. sub make_style {
  861. my $self = shift;
  862. my $entry = shift; # entity, relation, or section object
  863. my $id = $entry->{'id'};
  864. my $type = $entry->{'type'};
  865. confess "An argument to make_style does not have id and/or type property\n".Dumper($entry)
  866. unless $id && $type;
  867. # special processing for section:
  868. # extract section depth
  869. # search for matching style, first try exact match of the entity id
  870. my $style_ref =
  871. $self->{'styles'}->{'by_id'}->{"$type:$id"} ||
  872. $self->{'styles'}->{'by_id'}->{"$type:"};
  873. confess "Internal error: entity type $type:$id was not found in the style array." unless $style_ref;
  874. # special processing for sections (checking for depth)
  875. if ($type eq 'sec') {
  876. my $style_ref2 = $self->{'styles'}->{'by_id'}->{"$type:"._sec_lev($id).':'};
  877. $style_ref = $style_ref2 if ($style_ref2 && $style_ref->{'seq'} < $style_ref2->{'seq'});
  878. }
  879. # second try available search indices for the entity type
  880. foreach (qw/regex sect src rel/) { # for each index type
  881. last if $style_ref->{'seq'} >= $self->{'styles'}->{'count'};
  882. my $indx = $self->{'styles'}->{ $type.'_'.$_ } || next;
  883. # should be different for different index types
  884. my $style_found = undef;
  885. if (m/regex/o) {
  886. $style_found = $indx->{$id};
  887. } elsif (m/sect/o) {
  888. next unless $entry->{'section'}; # sections may be empty
  889. $style_found = $indx->{$entry->{'section'}};
  890. } elsif (m/src/o) {
  891. $style_found = ( # get the first style
  892. sort { $b->{'seq'} <=> $a->{'seq'} } # sort the styles in reverse of the sequence they were loaded
  893. grep {defined} # get rid of undefined entries for which styles do not exist
  894. map { $indx->{$_} } # find each source file in the style index
  895. @{ $entry->{'src'} } # retrieve a list of all source files in which the entry has been found
  896. )[0];
  897. } elsif (m/rel/o) {
  898. $style_found = ( # get the first style
  899. sort { $b->{'seq'} <=> $a->{'seq'} } # sort the styles in reverse of the sequence they were loaded
  900. grep {defined} # get rid of undefined entries for which styles do not exist
  901. map { $indx->{$_} } ( # look in the index for styles that have specified relation
  902. # record relation the same way it may be recorded in the style definiton,
  903. # make two entries: first is the relation name and second is the relation with a category on one of the ends
  904. (map { ( '<-'.$_->{'id'}.'-', '<-'.$_->{'id'}.'-'.$self->{'ent'}->{$_->{'from'}}->{'type'}.':'.$_->{'from'} ) }
  905. grep { $_->{'to'} eq $id } # grep relations that have current entities on the to side
  906. @{ $self->{'rel'} }),
  907. (map { ( '-'.$_->{'id'}.'->', '-'.$_->{'id'}.'->'.$self->{'ent'}->{$_->{'to'}}->{'type'}.':'.$_->{'to'} ) }
  908. grep { $_->{'from'} eq $id } # grep relations that have current entities on the from side
  909. @{ $self->{'rel'} })
  910. )
  911. )[0];
  912. } else {
  913. confess "oops! something really odd happened at ".__FILE__.':'.__LINE__."!!!";
  914. }
  915. next unless $style_found;
  916. next if ($type eq 'sec' && $style_found->{'depth'} && $style_found->{'depth'} != _sec_lev($id));
  917. $style_ref = $style_found if ($style_found->{'seq'} > $style_ref->{'seq'});
  918. }
  919. my %attrs = %{ $style_ref->{'attrs'} };
  920. # combine with the entry-specific style overwriting default values
  921. if (exists $entry->{'style'}) {
  922. while ( my($k, $v) = each (%{$entry->{'style'}})) {
  923. $attrs{$k} = $v;
  924. }
  925. }
  926. # clean %style of property-specific attributes
  927. while ( my($k, $v) = each (%attrs) ) {
  928. if (exists $entry->{'properties'} && $k =~ m/^\~(.+)/o) {
  929. $attrs{$1} = $v;
  930. }
  931. delete $attrs{$k} if $k =~ m/^~/o;
  932. }
  933. # assemble GV style string
  934. my @s = ();
  935. while (my ($k, $v) = each(%attrs)) {
  936. # replace iterator templates
  937. $v =~ s/%([^%]*)%([^%]*)%/$self->exp_iterator($1, $2, $entry)/ge;
  938. # replace templates
  939. $v =~ s/\@([^\@]*)\@/$self->exp_ent($1, $entry)/ge;
  940. # try to detect HTML formatting for nodes
  941. if ($v =~ m/\<TABLE/io) {
  942. push @s, "$k=<$v>";
  943. } else {
  944. push @s, "$k=\"$v\"";
  945. }
  946. }
  947. # in GV style separator for sections is different from the one for nodes and lines
  948. my $sep = ($type eq 'sec') ? "; " : ',';
  949. my $s = join $sep, @s;
  950. return $s;
  951. }
  952. # expand iterator template
  953. # will repeat the string for each key=val pair in properties
  954. sub exp_iterator {
  955. my ($self, $str, $sep, $entry) = (@_); # var is a template variable, entry is a relation or entity object
  956. return '%' unless $str;
  957. croak "No properties object for iterator template %$str% in ".$entry->{'id'} unless exists $entry->{'properties'};
  958. my @accum = ();
  959. while (my ($k, $v) = each(%{$entry->{'properties'}})) {
  960. my $s = $str;
  961. $s =~ s/\@PNAME\@/$k/g;
  962. my $enc_v = &encode_entities($v);
  963. # TODO: PVAL must be replaced with P:prop_name and the processed in exp_ent
  964. # however, this causes problems with HTML-format <BR>s, need ideas
  965. $s =~ s/\@PVAL\@/$enc_v/g;
  966. push @accum, $s;
  967. }
  968. return join $sep, @accum;
  969. }
  970. # expands templates in the style string
  971. # supported template variables:
  972. # @@ - symbol @
  973. # @ID@ - entity ID
  974. # @ID_STRING@ - entity ID formated into string (_ replaced with \n)
  975. # @P:name@ - the value of property 'name'
  976. sub exp_ent {
  977. my ($self, $var, $entry) = (@_); # var is a template variable, entry is a relation or entity or section object
  978. unless ($var) {
  979. return '@';
  980. } elsif ($var eq "ID") {
  981. return $entry->{'id'};
  982. } elsif ($var eq "ID_STRING") {
  983. my $label = $entry->{'id'};
  984. $label =~ s/([a-z0-9]{2,})([A-Z])/$1\\n$2/sgo; # split lines inside camel case words
  985. $label =~ s/_/\\n/sgo; # split line on underscores
  986. $label=~ s/\\n(\d)/ $1/g; # remove new lines after numbers
  987. return encode_entities($label);
  988. } elsif ($var =~ m/^P:(\w+)/o) {
  989. croak "No properties object for template variable $var in ".$entry->{'id'} unless exists $entry->{'properties'};
  990. if (exists $entry->{'properties'}->{$1}) {
  991. $Text::Wrap::columns=30;
  992. $Text::Wrap::separator='<BR/>';
  993. # this is the way to properly break lines with HTML encoded characters
  994. my $str = wrap("", "", $entry->{'properties'}->{$1});
  995. return join '<BR/>', map {encode_entities($_)} split '<BR/>', $str;
  996. } else { # property does not exists
  997. # TODO: create a flag to ignore empty properties
  998. #croak "No property named '$1' in ".$entry->{'id'} unless exists $entry->{'properties'}->{$1} if there is flag to stop;
  999. return '';
  1000. }
  1001. } elsif ($var eq "SECTION") {
  1002. my $sec = ($entry->{'type'} eq 'sec') ? $entry->{'id'} : $entry->{'sec'};
  1003. $sec =~ s/^.+\|//o; # only keep the last portion of the full section name
  1004. $sec =~ s/([a-z0-9]{2,})([A-Z])/$1 $2/go; # insert spaces in CamelCase words
  1005. $sec =~ s/_/ /og; # replace underscore with spaces
  1006. return $sec;
  1007. } else {
  1008. croak "Unknown template variable $var";
  1009. }
  1010. }
  1011. # create a legend and output it into a file
  1012. sub write_legend {
  1013. my $self = shift;
  1014. my $fname = shift || croak "Filehandle must be provided";
  1015. my $opts = shift || {}; # second argument is a hash with options
  1016. $opts->{'content'} = 'sect' unless exists $opts->{'content'} && $opts->{'content'};
  1017. # options: 'content' => 'sect,rel' -- output legend for sections and relations
  1018. my $fh = new FileHandle ">$fname" || die "Unable to open '$fname' for writing the legend";
  1019. croak "SOFT object does not contain section legend" unless exists $self->{'sec_legend'};
  1020. print $fh "digraph G { \n";
  1021. print $fh "\t".$opts->{'gvopts'}."\n" if exists $opts->{'gvopts'} && $opts->{'gvopts'};
  1022. if ( $opts->{'content'} =~ m/\bsect\b/o ) {
  1023. print $fh "\t/* legend for sections */\n";
  1024. my $ck = 0;
  1025. my $pd = -1;
  1026. foreach my $k (sort keys %{$self->{sections}}) {
  1027. #print STDERR "$k\n";
  1028. my $cd = ($k =~ tr/|//); # current depth
  1029. $k =~ m/([^|]+)$/o;
  1030. my $label = $1;
  1031. foreach my $l (reverse $cd..$pd) {
  1032. print $fh "\t" x $l . "\t}\n";
  1033. }
  1034. # remove unprintable characters from the section name
  1035. my $sname = $k;
  1036. $sname =~ s/\W/_/go;
  1037. print $fh "\n" . "\t" x $cd . "\tsubgraph cluster".$ck++." {\n";
  1038. print $fh "\t" x $cd . "\t\tlabel = \"$label\";\n";
  1039. print $fh "\t" x $cd . "\t\tlabelloc = t;\n";
  1040. print $fh "\t" x $cd . "\t\tstyle = filled;\n";
  1041. print $fh "\t" x $cd . "\t\tfillcolor = \"#".$self->{'sec_legend'}->{$k}."\";\n";
  1042. print $fh "\t" x $cd . "\t\t$sname [shape=plaintext,fixedsize=t,height=0.1,width=0.1,label=\"\"]"
  1043. if keys %{$self->{sections}->{$k}};
  1044. $pd = $cd;
  1045. }
  1046. foreach my $l (reverse 0..$pd) {
  1047. print $fh "\t" x $l . "\t}\n";
  1048. }
  1049. print $fh "\n";
  1050. # foreach my $s (sort keys %{$self->{'sec_legend'}}) {
  1051. # if (%{$self->{'sections'}->{$s}}) {
  1052. #
  1053. # my $sname = $s;
  1054. # $sname =~ s/\W/_/go;
  1055. #
  1056. # my $label = $s;
  1057. # $label =~ s/\|/\\n/og;
  1058. # print $fh "\t$sname [shape=box,label=\"$label\",style=filled,fillcolor=\"#".$self->{'sec_legend'}->{$s}."\"];\n";
  1059. # } else {
  1060. # print $fh "\t/* empty section '$s' omitted */\n";
  1061. # }
  1062. # }
  1063. } elsif ( $opts->{'content'} =~ m/\brel\b/o ) {
  1064. print $fh "\t/* legend for relations */\n";
  1065. print $fh "\tuninplemented; \n";
  1066. }
  1067. print $fh "}\n";
  1068. $fh->close();
  1069. }
  1070. # return the counts of the type of entities in the SOFT structure
  1071. sub counts {
  1072. my $self = shift;
  1073. my %c = (
  1074. 'relations' => scalar @{$self->{'rel'}},
  1075. 'entities' => scalar keys %{$self->{'ent'}},
  1076. 'sections' => scalar keys %{$self->{'sections'}},
  1077. );
  1078. return wantarray ? %c : "relation_types=".$c{'relations'}." entities=".$c{'entities'}." sections=".$c{'sections'};
  1079. }
  1080. # dumps the content of the SOFT object on the specified filehandle
  1081. # arguments
  1082. # filename to write into ('-' for STDOUT)
  1083. # a list of options of what to dump (ent[ities], rel[ations], sty[les])
  1084. sub dump {
  1085. my $self = shift;
  1086. my $fname = shift || croak "File name must be provided";
  1087. my $fh = undef;
  1088. if ($fname eq '-') {
  1089. $fh = \*STDOUT;
  1090. } else {
  1091. $fh = new FileHandle ">$fname" || die "Unable to open '$fname' for writing";
  1092. }
  1093. binmode $fh, ':encoding(UTF-8)';
  1094. if (@_) {
  1095. foreach (@_) {
  1096. print $fh Dumper($self) if /^all$/o;
  1097. print $fh Dumper($self->{'ent'}) if /^ent(?:ities)?$/o;
  1098. print $fh Dumper($self->{'rel'}) if /^rel(?:ations)?/o;
  1099. print $fh Dumper($self->{'styles'}) if /^sty(?:les)?/o;
  1100. }
  1101. } else {
  1102. print $fh Dumper($self);
  1103. }
  1104. $fh->close() unless $fname eq '-';
  1105. }
  1106. # list the entities in the soft object
  1107. sub list {
  1108. my ($self, $fname, $opts) = (@_);
  1109. # opts should include separator, format)
  1110. $opts = {} unless $opts;
  1111. $opts->{'sep'} = "\n" unless exists $opts->{'sep'};
  1112. # FIXME: factor out file open into SOFT::Utils
  1113. my $fh = undef;
  1114. if ($fname eq '-') {
  1115. $fh = \*STDOUT;
  1116. } else {
  1117. $fh = new FileHandle ">$fname" || die "Unable to open '$fname' for writing";
  1118. }
  1119. binmode $fh, ':encoding(UTF-8)';
  1120. foreach ($self->all()) {
  1121. # TODO: this all has to be changed to support format string
  1122. m/^cat:(.*)$/ || next;
  1123. print $1.$opts->{'sep'};
  1124. }
  1125. }
  1126. ### ontological functions
  1127. # finds entities that are related to the specified entities through specified relation type
  1128. # the function will find all triplets that satisfy provided arguments ($from, $rel, $to)
  1129. # arguments are understood as cat:ent1 -rel-> cat:ent2
  1130. # 'from' and 'to' arguments may be either entity names or *references* to the arrays of entity names
  1131. # one of 'from' and 'to' may be a question mark, if none is a question mark the existence of the provided relation is tested
  1132. # returns an list of relation objects that satisfy the condition
  1133. # existence of entities is not checked
  1134. sub find {
  1135. my ($self, $from, $rel, $to) = (@_);
  1136. my @r = ();
  1137. confess "One or more of (from, rel, to) is not defined in find function"
  1138. unless defined $from && defined $rel && defined $to;
  1139. confess "Two variables in find function is not supported"
  1140. if ($from eq '?' && $to eq '?');
  1141. if ( $from eq '?' ) {
  1142. $to = [$to] unless ref $to;
  1143. my %idx = map { $_ => 1 } @$to;
  1144. @r = map {$_->{'from'}}
  1145. grep { exists $idx{$_->{'to'}} && $_->{'id'} eq $rel }
  1146. @{$self->{'rel'}};
  1147. } elsif ($to eq '?') {
  1148. $from = [$from] unless ref $from;
  1149. my %idx = map { $_ => 1 } @$from;
  1150. @r = map {$_->{'to'}}
  1151. grep { exists $idx{$_->{'from'}} && $_->{'id'} eq $rel }
  1152. @{$self->{'rel'}};
  1153. } else {
  1154. confess "No variables were specified in find function"
  1155. }
  1156. return @r;
  1157. }
  1158. # finds entities satisfying the relation by traversing a chain of relations
  1159. # satisfying the condition (useful to traverse DAGs)
  1160. # arguments exactly the same as in find
  1161. sub find_traverse {
  1162. my ($self, $from, $rel, $to) = (@_);
  1163. my %found = ();
  1164. if (ref $from) {
  1165. %found = map { $_ => 1 } @$from;
  1166. } elsif (ref $to) {
  1167. %found = map { $_ => 1 } @$to;
  1168. } else {
  1169. croak "No variable specified in find function";
  1170. }
  1171. my $sz;
  1172. do {
  1173. $sz = scalar keys %found;
  1174. $from = [keys %found] unless $from eq '?';
  1175. $to = [keys %found] unless $to eq '?';
  1176. %found = ( %found, map { $_ => 1 } $self->find( $from, $rel, $to ) ) ;
  1177. } while ( scalar keys %found > $sz );
  1178. return keys %found;
  1179. }
  1180. # check (or filter an array) if an entity is a leaf given the relation
  1181. # leaf is understood as: cat:leaf -relation-> cat:not_leaf
  1182. # arguments:
  1183. # relation to check
  1184. # a list of entities to check
  1185. # returns a list of entities or empty list
  1186. sub is_leaf {
  1187. my $self = shift;
  1188. my $rel = shift;
  1189. my @found = ();
  1190. foreach (@_) {
  1191. push @found, $_ unless $self->find( '?', $rel, $_);
  1192. }
  1193. return @found;
  1194. }
  1195. =head1 SYNOPSIS
  1196. Quick summary of what the module does.
  1197. Perhaps a little code snippet.
  1198. use SOFT;
  1199. my $softh = SOFT->new();
  1200. $softh->parse_soft(STDIN);
  1201. $softh->write_gv(STDOUT)
  1202. ...
  1203. =head1 AUTHOR
  1204. "Alex Sorokine", C<< <"SorokinA@ornl.gov"> >>
  1205. =head1 BUGS
  1206. Please report any bugs or feature requests to C<bug-soft at rt.cpan.org>, or through
  1207. the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=SOFT>. I will be notified, and then you'll
  1208. automatically be notified of progress on your bug as I make changes.
  1209. =head1 SUPPORT
  1210. You can find documentation for this module with the perldoc command.
  1211. perldoc SOFT
  1212. You can also look for information at:
  1213. =over 4
  1214. =item * RT: CPAN's request tracker
  1215. L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=SOFT>
  1216. =item * AnnoCPAN: Annotated CPAN documentation
  1217. L<http://annocpan.org/dist/SOFT>
  1218. =item * CPAN Ratings
  1219. L<http://cpanratings.perl.org/d/SOFT>
  1220. =item * Search CPAN
  1221. L<http://search.cpan.org/dist/SOFT/>
  1222. =back
  1223. =head1 ACKNOWLEDGEMENTS
  1224. =head1 COPYRIGHT & LICENSE
  1225. Copyright 2010 Alexandre Sorokine, all rights reserved.
  1226. This program is free software; you can redistribute it and/or modify it
  1227. under the same terms as Perl itself.
  1228. =cut
  1229. 1; # End of SOFT.pm