PageRenderTime 60ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/swignition/Swignition/uF/hMeasure.pm

http://cognition-parser.googlecode.com/
Perl | 558 lines | 457 code | 85 blank | 16 comment | 71 complexity | ef0b7b645494e156144da378afa97533 MD5 | raw file
Possible License(s): LGPL-2.0, GPL-3.0
  1. #!/usr/bin/perl
  2. ######################################################################
  3. package Swignition::uF::hMeasure;
  4. ######################################################################
  5. # This is a complete hodge-podge of a module, but it seems to work.
  6. use CGI::Util;
  7. use Swignition::GenericParser::Utils;
  8. use Swignition::MagicString;
  9. use Swignition::uF;
  10. use POSIX;
  11. use XML::LibXML qw(:all);
  12. use strict;
  13. use utf8;
  14. sub consume
  15. {
  16. my $page = shift;
  17. $page->{uF}->{hMeasure} = [ parse_all($page) ];
  18. if (defined $page->{uF}->{hMeasure}->[0])
  19. {
  20. $page->mergeNS($page->{uF}->{hMeasure}->[0]);
  21. }
  22. foreach my $a (@{ $page->{uF}->{hMeasure} })
  23. {
  24. $a->rdf_subject_merge($page);
  25. }
  26. }
  27. sub parse_all
  28. {
  29. my $page = shift;
  30. my $within = shift || $page->{DOM};
  31. my @rv;
  32. my @nodes1 = searchClass('hmeasure', $within);
  33. my @nodes2 = searchClass('hangle', $within);
  34. my @nodes3 = searchClass('hmoney', $within);
  35. my @nodes = (@nodes1, @nodes2, @nodes3);
  36. foreach my $a (@nodes)
  37. {
  38. next if ($a->getAttribute('class') =~ /\b(tolerance)\b/);
  39. my $A = parse($page, $a);
  40. push @rv, $A;
  41. }
  42. return @rv;
  43. } #/sub parse_all
  44. sub uri
  45. {
  46. my $this = shift;
  47. my $all = shift;
  48. my @rv;
  49. if (length $this->{_id})
  50. {
  51. push @rv, Swignition::uF::TDBURI($this->{_page}->uri.'#'.$this->{_id});
  52. }
  53. if (lc($this->{_dom}->tagName) eq 'body')
  54. {
  55. push @rv, Swignition::uF::TDBURI($this->{_page}->uri);
  56. }
  57. push @rv, Swignition::GenericParser::Utils::BNodeURI($this->{_dom}, 'Measurement')
  58. unless (@rv);
  59. return $rv[0] unless (defined $all);
  60. while ($all) { shift @rv; $all--; }
  61. return @rv;
  62. } #/sub uri
  63. sub qv_uri
  64. {
  65. my $this = shift;
  66. my $all = shift;
  67. my @rv;
  68. push @rv, Swignition::GenericParser::Utils::BNodeURI($this->{_dom}, 'QualifiedValue');
  69. return $rv[0] unless (defined $all);
  70. while ($all) { shift @rv; $all--; }
  71. return @rv;
  72. } #/sub qv_uri
  73. sub t_uri
  74. {
  75. my $this = shift;
  76. my $all = shift;
  77. my @rv;
  78. if (length $this->{tolerance}->{_id})
  79. {
  80. push @rv, Swignition::uF::TDBURI($this->{_page}->uri . '#' . $this->{tolerance}->{_id});
  81. }
  82. elsif ($this->{tolerance}->{_dom})
  83. {
  84. push @rv, Swignition::GenericParser::Utils::BNodeURI($this->{tolerance}->{_dom}, 'Tolerance');
  85. }
  86. else
  87. {
  88. push @rv, Swignition::GenericParser::Utils::BNodeURI($this->{_dom}, 'Tolerance');
  89. }
  90. return $rv[0] unless (defined $all);
  91. while ($all) { shift @rv; $all--; }
  92. return @rv;
  93. } #/sub t_uri
  94. sub item_uri
  95. {
  96. my $this = shift;
  97. my $all = shift;
  98. my @rv;
  99. if ($this->{item_object})
  100. {
  101. return $this->{item_object}->uri($all);
  102. }
  103. if (length $this->{item_link})
  104. {
  105. push @rv, Swignition::uF::TDBURI($this->{item_link});
  106. }
  107. if (length $this->{item_dom}->getAttribute('id'))
  108. {
  109. push @rv, Swignition::uF::TDBURI($this->{_page}->uri . '#' . $this->{item_dom}->getAttribute('id'));
  110. }
  111. push @rv, Swignition::GenericParser::Utils::BNodeURI($this->{item_dom}, 'MeasuredThing')
  112. unless (@rv);
  113. return $rv[0] unless (defined $all);
  114. while ($all) { shift @rv; $all--; }
  115. return @rv;
  116. } #/sub t_uri
  117. sub dim_uri
  118. {
  119. my $this = shift;
  120. return 'http://purl.org/commerce#costs'
  121. if ($this->{class} eq 'hmoney' && !length $this->{type});
  122. return unless (length $this->{type});
  123. my $dimension = lc($this->{type});
  124. $dimension =~ s/\s+/ /g;
  125. $dimension =~ s/[^a-z0-9 ]//g;
  126. $dimension =~ s/ ([a-z])/uc($1)/ge;
  127. return 'http://buzzword.org.uk/rdf/measure#'.$dimension;
  128. } #/sub dim_uri
  129. sub rdf_subject_merge
  130. {
  131. my $this = shift;
  132. my $page = shift;
  133. my $val;
  134. my $subject = Swignition::RDFModel::Subject->new($this->uri);
  135. $subject->addObject($this);
  136. $subject->setDom($this->{_dom});
  137. my $val = Swignition::RDFModel::Value->new('http://buzzword.org.uk/rdf/measure-aux#Measurement', 1);
  138. $subject->addVal("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", $val);
  139. my $qv_subject = Swignition::RDFModel::Subject->new($this->qv_uri);
  140. my $val = Swignition::RDFModel::Value->new(
  141. ($this->{class} eq 'hmoney'
  142. ? 'http://purl.org/commerce#Price'
  143. : 'http://buzzword.org.uk/rdf/measure-aux#QualifiedValue')
  144. , 1);
  145. $qv_subject->addVal("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", $val);
  146. $val = Swignition::RDFModel::Value->new($this->qv_uri, 1);
  147. $subject->addVal("http://buzzword.org.uk/rdf/measure-aux#hasValue", $val);
  148. if (length $this->{unit})
  149. {
  150. $val = Swignition::RDFModel::Value->new($this->{unit});
  151. $qv_subject->addVal(
  152. ($this->{class} eq 'hmoney'
  153. ?'http://purl.org/commerce#currency'
  154. :"http://buzzword.org.uk/rdf/measure-aux#unit"),
  155. $val);
  156. }
  157. if (length $this->{num})
  158. {
  159. $val = Swignition::RDFModel::Value->new($this->{num});
  160. $qv_subject->addVal(
  161. ($this->{class} eq 'hmoney'
  162. ?'http://purl.org/commerce#amount'
  163. :"http://www.w3.org/1999/02/22-rdf-syntax-ns#value"),
  164. $val);
  165. }
  166. if ($this->{class} eq 'hangle')
  167. {
  168. $val = Swignition::RDFModel::Value->new($this->{num_label});
  169. $qv_subject->addVal('http://www.w3.org/2000/01/rdf-schema#label', $val);
  170. }
  171. if ($this->{tolerance}->{class} eq 'percentage'
  172. || $this->{tolerance}->{unit} eq '%')
  173. {
  174. $val = Swignition::RDFModel::Value->new($this->{tolerance}->{num}.'%');
  175. $subject->addVal("http://buzzword.org.uk/rdf/measure-aux#hasTolerance", $val);
  176. }
  177. elsif ($this->{tolerance}->{num})
  178. {
  179. my $t_subject = Swignition::RDFModel::Subject->new($this->t_uri);
  180. my $val = Swignition::RDFModel::Value->new(
  181. ($this->{class} eq 'hmoney'
  182. ? 'http://purl.org/commerce#Price'
  183. : 'http://buzzword.org.uk/rdf/measure-aux#Tolerance')
  184. , 1);
  185. $t_subject->addVal("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", $val);
  186. $val = Swignition::RDFModel::Value->new($this->t_uri, 1);
  187. $subject->addVal("http://buzzword.org.uk/rdf/measure-aux#hasTolerance", $val);
  188. if (length $this->{tolerance}->{unit})
  189. {
  190. $val = Swignition::RDFModel::Value->new($this->{tolerance}->{unit});
  191. $t_subject->addVal(
  192. ($this->{class} eq 'hmoney'
  193. ?'http://purl.org/commerce#currency'
  194. :"http://buzzword.org.uk/rdf/measure-aux#unit"),
  195. $val);
  196. }
  197. if (length $this->{tolerance}->{num})
  198. {
  199. $val = Swignition::RDFModel::Value->new($this->{tolerance}->{num});
  200. $t_subject->addVal(
  201. ($this->{class} eq 'hmoney'
  202. ?'http://purl.org/commerce#amount'
  203. :"http://www.w3.org/1999/02/22-rdf-syntax-ns#value"),
  204. $val);
  205. }
  206. $page->mergeSubject($t_subject);
  207. }
  208. if (length $this->dim_uri)
  209. {
  210. $val = Swignition::RDFModel::Value->new($this->dim_uri, 1);
  211. $subject->addVal("http://buzzword.org.uk/rdf/measure-aux#dimension", $val);
  212. }
  213. if ($this->{item_object} || $this->{item_dom})
  214. {
  215. my $item_subject = Swignition::RDFModel::Subject->new($this->item_uri);
  216. $item_subject->addVal(
  217. 'http://buzzword.org.uk/rdf/measure-aux#hasMeasurement',
  218. Swignition::RDFModel::Value->new($this->uri, 1)
  219. );
  220. $subject->addVal(
  221. 'http://buzzword.org.uk/rdf/measure-aux#item',
  222. Swignition::RDFModel::Value->new($this->item_uri, 1)
  223. );
  224. if (length $this->dim_uri)
  225. {
  226. $item_subject->addVal(
  227. $this->dim_uri,
  228. Swignition::RDFModel::Value->new($this->qv_uri, 1)
  229. );
  230. }
  231. if (!$this->{item_object})
  232. {
  233. $item_subject->addVal(
  234. 'http://www.w3.org/2000/01/rdf-schema#label',
  235. Swignition::RDFModel::Value->new($this->{item})
  236. );
  237. }
  238. $page->mergeSubject($item_subject);
  239. }
  240. $page->mergeSubject($subject);
  241. $page->mergeSubject($qv_subject);
  242. } #/sub rdf_subject
  243. sub metadata_ns
  244. {
  245. my $this = shift;
  246. return {
  247. 'commerce' => {
  248. nsurl=>'http://purl.org/commerce#',
  249. title=>'RDF Commerce Vocab'
  250. },
  251. 'measure' => {
  252. nsurl=>'http://buzzword.org.uk/rdf/measure#',
  253. title=>'RDF Measurements Vocab'
  254. },
  255. 'measurex' => {
  256. nsurl=>'http://buzzword.org.uk/rdf/measure-aux#',
  257. title=>'RDF Measurements Vocab (Auxiliary)'
  258. }
  259. };
  260. } #/sub metadata_ns
  261. sub parse
  262. {
  263. my $page = shift;
  264. my $rv = { '_dom'=>shift };
  265. my $pkg = __PACKAGE__;
  266. if (defined $page->{uF}->{_Shortcuts}->{$pkg}->{ $rv->{_dom}->getAttribute('_xpath') })
  267. { return $page->{uF}->{_Shortcuts}->{$pkg}->{ $rv->{_dom}->getAttribute('_xpath') }; }
  268. else
  269. { $page->{uF}->{_Shortcuts}->{$pkg}->{ $rv->{_dom}->getAttribute('_xpath') } = $rv; }
  270. my $root = $rv->{'_dom'}->cloneNode(1);
  271. my @RemoveTheseNodes;
  272. my $id = $root->getAttribute('id');
  273. $rv->{_id} = $id if (length $id);
  274. $rv->{_page} = $page;
  275. Swignition::uF::data_patterns($page, $root, 2);
  276. # Extract embedded hCards
  277. my @nested = searchClass('vcard', $root);
  278. foreach my $h (@nested)
  279. {
  280. if ($h->getAttribute('class') =~ / (^|\s) item (\s|$) /x)
  281. {
  282. $rv->{item_object} = Swignition::uF::hCard::parse($page, $h);
  283. push @RemoveTheseNodes, $h;
  284. last;
  285. }
  286. my $newClass = $h->getAttribute('class');
  287. $newClass =~ s/\b(item)\b//gix;
  288. $h->setAttribute('class', $newClass);
  289. }
  290. # Extract embedded hCalendar events
  291. my @nested = searchClass('vevent', $root);
  292. foreach my $h (@nested)
  293. {
  294. if ($h->getAttribute('class') =~ / (^|\s) item (\s|$) /x)
  295. {
  296. $rv->{item_object} = Swignition::uF::hEvent::parse($page, $h);
  297. push @RemoveTheseNodes, $h;
  298. last;
  299. }
  300. my $newClass = $h->getAttribute('class');
  301. $newClass =~ s/\b(item)\b//gix;
  302. $h->setAttribute('class', $newClass);
  303. }
  304. # Now that we have reached here, no other composite microformats have
  305. # any business being nested within this root element. So in the interest
  306. # of Microformat opacity, let's destroy them.
  307. Swignition::uF::destroyer($root);
  308. # We'll use these regular expressions later.
  309. my $_nonZeroDigit = '[1-9]';
  310. my $_digit = '\d';
  311. my $_natural = "($_nonZeroDigit)($_digit)*";
  312. my $_integer = "(0|(\\-|\x{2212})?($_natural)+)";
  313. my $_decimal = "($_integer)[\\.\\,]($_digit)*";
  314. my $_mantissa = "($_decimal|$_integer)";
  315. my $_sciNumber = "($_mantissa)[Ee]($_integer)";
  316. my $_number = "($_sciNumber|$_decimal|$_integer|\\x{00BC}|\\x{00BD}|\\x{00BE})";
  317. my $_degree = "($_number)(deg|\\x{00b0})";
  318. my $_minute = "($_number)(min|\\x{2032}|\\\')";
  319. my $_second = "($_number)(sec|\\x{2033}|\\\")";
  320. # Type
  321. $rv->{class} = 'hmeasure';
  322. $rv->{class} = 'hangle' if ($root->getAttribute('class') =~ /\b(hangle)\b/);
  323. $rv->{class} = 'hmoney' if ($root->getAttribute('class') =~ /\b(hmoney)\b/);
  324. # Number
  325. my @nodes = searchClass('num', $root);
  326. my $str = STRINGIFY($nodes[0], 'value');
  327. $rv->{num} = $str
  328. if (length $str);
  329. push @RemoveTheseNodes, $nodes[0] if ($nodes[0]);
  330. # Unit
  331. unless ($rv->{class} eq 'hangle')
  332. {
  333. @nodes = searchClass('unit', $root);
  334. $str = STRINGIFY($nodes[0], 'value');
  335. $rv->{unit} = $str
  336. if (length $str);
  337. push @RemoveTheseNodes, $nodes[0] if ($nodes[0]);
  338. }
  339. # Type
  340. @nodes = searchClass('type', $root);
  341. $str = STRINGIFY($nodes[0], 'value');
  342. $rv->{type} = $str
  343. if (length $str);
  344. push @RemoveTheseNodes, $nodes[0] if ($nodes[0]);
  345. # Item
  346. unless ($rv->{item_object})
  347. {
  348. @nodes = searchClass('item', $root);
  349. if (@nodes)
  350. {
  351. my $n = $nodes[0];
  352. my $link;
  353. $str = STRINGIFY($n, 'value');
  354. if (length $n->getAttribute('src'))
  355. { $link = $page->uri( $n->getAttribute('src') ); }
  356. elsif (length $n->getAttribute('href'))
  357. { $link = $page->uri( $n->getAttribute('href') ); }
  358. elsif (length $n->getAttribute('data'))
  359. { $link = $page->uri( $n->getAttribute('data') ); }
  360. $rv->{item} = $str
  361. if (length $str);
  362. $rv->{item_link} = $link
  363. if (length $link);
  364. $rv->{item_dom} = $n;
  365. push @RemoveTheseNodes, $n;
  366. }
  367. }
  368. # Tolerance
  369. @nodes = searchClass('tolerance', $root);
  370. my $str = STRINGIFY($nodes[0], 'value');
  371. if ($str =~ /^\s*($_number)\s*\%\s*$/)
  372. {
  373. $rv->{tolerence} = bless {
  374. 'class' => 'percentage' ,
  375. 'num' => $1 ,
  376. 'unit' => '%'
  377. };
  378. }
  379. elsif ($nodes[0])
  380. {
  381. my $tolerance = parse($nodes[0]);
  382. $rv->{tolerence} = $tolerance
  383. if (length $tolerance->{num});
  384. }
  385. push @RemoveTheseNodes, $nodes[0] if ($nodes[0]);
  386. foreach my $RemoveIt (@RemoveTheseNodes)
  387. { $RemoveIt->parentNode->removeChild($RemoveIt); }
  388. my $str = STRINGIFY($root, 'value');
  389. unless ($rv->{tolerance})
  390. {
  391. my $tol;
  392. ($str, $tol) = split /\x{2213}/, $str;
  393. $str =~ s/(^\s+)|(\s+$)//g;
  394. $tol =~ s/(^\s+)|(\s+$)//g;
  395. if (length $tol)
  396. {
  397. $tol =~ /$_number/;
  398. $rv->{tolerance} = bless {
  399. num => $1 ,
  400. class => $rv->{class}
  401. };
  402. $tol =~ s/$_number//;
  403. $rv->{tolerance}->{unit} = $tol;
  404. }
  405. }
  406. if ($rv->{class} eq 'hangle' && !length $rv->{num})
  407. {
  408. $rv->{num} = $str;
  409. }
  410. elsif (length $rv->{num} && !length $rv->{unit})
  411. {
  412. $rv->{unit} = $str;
  413. if ($rv->{class} eq 'hmoney')
  414. {
  415. $str =~ /(\b[A-Z]{3}\b|\x{20AC}|\x{00A3}|\x{00A5}|\x{0024})/i;
  416. $rv->{unit} = $1 if (length $1);
  417. }
  418. }
  419. elsif (length $rv->{unit} && !length $rv->{num})
  420. {
  421. $str =~ s/\s+//g;
  422. $str =~ /$_number/;
  423. $rv->{num} = $str;
  424. }
  425. elsif (!length $rv->{num} && !length $rv->{unit})
  426. {
  427. $str =~ /$_number/;
  428. $rv->{num} = $1;
  429. $str =~ s/\s*($_number)\s*//;
  430. $rv->{unit} = $str;
  431. if ($rv->{class} eq 'hmoney')
  432. {
  433. $str =~ /(\b[A-Z]{3}\b|\x{20AC}|\x{00A3}|\x{00A5}|\x{0024})/i;
  434. $rv->{unit} = $1 if (length $1);
  435. }
  436. }
  437. if ($rv->{class} eq 'hmoney')
  438. {
  439. $rv->{unit} = 'EUR' if ($rv->{unit} =~ /^\x{20AC}$/);
  440. $rv->{unit} = 'GBP' if ($rv->{unit} =~ /^\x{00A3}$/);
  441. $rv->{unit} = 'JPY' if ($rv->{unit} =~ /^\x{00A5}$/);
  442. $rv->{unit} = 'USD' if ($rv->{unit} =~ /^\x{0024}$/);
  443. }
  444. $rv->{num} =~ s/\,/\./g;
  445. $rv->{num} =~ s/\x{2212}/\-/g;
  446. if ($rv->{class} eq 'hangle')
  447. {
  448. $str = $rv->{num};
  449. $str =~ m/$_degree/; $rv->{num_degree} = $1 if (length $1);
  450. $str =~ m/$_minute/; $rv->{num_minute} = $1 if (length $1);
  451. $str =~ m/$_second/; $rv->{num_second} = $1 if (length $1);
  452. if ($rv->{num_degree} < 0)
  453. {
  454. $rv->{num_minute} *= -1;
  455. $rv->{num_second} *= -1;
  456. }
  457. elsif ($rv->{num_degree} == 0 && $rv->{num_minute} < 0)
  458. {
  459. $rv->{num_second} *= -1;
  460. }
  461. $rv->{num} = $rv->{num_degree}
  462. + ( $rv->{num_minute} / 60 )
  463. + ( $rv->{num_second} / 3600 );
  464. $rv->{num_label} = $str;
  465. }
  466. if ($rv->{class} ne 'hangle' && defined $rv->{tolerance} && !length $rv->{tolerance}->{unit})
  467. {
  468. $rv->{tolerance}->{unit} = $rv->{unit};
  469. }
  470. bless $rv;
  471. return $rv;
  472. } #/sub parse
  473. 1; #/package Swignition::uF::hMeasure