PageRenderTime 49ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/v1/mod_semanticweb/arc/extractors/ARC2_RDFExtractor.php

https://code.google.com/p/goodrelations-for-joomla/
PHP | 249 lines | 191 code | 37 blank | 21 comment | 36 complexity | 710bbfe80b94e7c984720285bd3ea66d MD5 | raw file
  1. <?php
  2. /*
  3. homepage: http://arc.semsol.org/
  4. license: http://arc.semsol.org/license
  5. class: ARC2 RDF Extractor
  6. author: Benjamin Nowack
  7. version: 2008-11-18 (Fix: Skip comments. Thanks to Masahide Kanzaki)
  8. */
  9. ARC2::inc('Class');
  10. class ARC2_RDFExtractor extends ARC2_Class {
  11. function __construct($a = '', &$caller) {
  12. parent::__construct($a, $caller);
  13. }
  14. function ARC2_RDFExtractor($a = '', &$caller) {
  15. $this->__construct($a, $caller);
  16. }
  17. function __init() {
  18. parent::__init();
  19. $this->nodes = $this->caller->getNodes();
  20. $this->index = $this->caller->getNodeIndex();
  21. $this->bnode_prefix = $this->v('bnode_prefix', 'arc' . substr(md5(uniqid(rand())), 0, 4) . 'b', $this->a);
  22. $this->bnode_id = 0;
  23. $this->keep_cdata_ws = $this->v('keep_cdata_whitespace', 0, $this->a);
  24. if (!isset($this->a['ns'])) $this->a['ns'] = array('rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
  25. }
  26. /* */
  27. function x($re, $v, $options = 'si') {
  28. return ARC2::x($re, $v, $options);
  29. }
  30. function camelCase($v) {
  31. $r = ucfirst($v);
  32. while (preg_match('/^(.*)[\-\_ ](.*)$/', $r, $m)) {
  33. $r = $m[1] . ucfirst($m[2]);
  34. }
  35. return $r;
  36. }
  37. function createBnodeID(){
  38. $this->bnode_id++;
  39. return '_:' . $this->bnode_prefix . $this->bnode_id;
  40. }
  41. /* */
  42. function extractRDF() {
  43. }
  44. /* */
  45. function addTs($ts) {
  46. foreach ($ts as $t) {
  47. $this->caller->addT($t);
  48. }
  49. }
  50. function addT($t) {
  51. return $this->caller->addT($t);
  52. }
  53. /* */
  54. function getSubNodes($n) {
  55. return $this->v($n['id'], array(), $this->index);
  56. }
  57. function getParentNode($n) {
  58. return isset($this->nodes[$n['p_id']]) ? $this->nodes[$n['p_id']] : 0;
  59. }
  60. /* */
  61. function getSubNodesByClass($n, $cls, $skip_self = 0) {
  62. if (!$skip_self && $this->hasClass($n, $cls)) {
  63. return array($n);
  64. }
  65. $r = array();
  66. $sns = $this->getSubNodes($n);
  67. foreach ($sns as $sn) {
  68. if ($sub_r = $this->getSubNodesByClass($sn, $cls, 0)) {
  69. $r = array_merge($r, $sub_r);
  70. }
  71. }
  72. return $r;
  73. }
  74. function getSubNodeByClass($n, $cls, $skip_self = 0) {
  75. if (!$skip_self && $this->hasClass($n, $cls)) {
  76. return $n;
  77. }
  78. $sns = $this->getSubNodes($n);
  79. foreach ($sns as $sn) {
  80. if ($sub_r = $this->getSubNodeByClass($sn, $cls, 0)) {
  81. return $sub_r;
  82. }
  83. }
  84. return 0;
  85. }
  86. function getParentNodeByClass($n, $cls, $skip_self = 0) {
  87. if (!$skip_self && $this->hasClass($n, $cls)) {
  88. return $n;
  89. }
  90. if ($pn = $this->getParentNode($n)) {
  91. if ($sub_r = $this->getParentNodeByClass($pn, $cls, 0)) {
  92. return $sub_r;
  93. }
  94. }
  95. return 0;
  96. }
  97. /* */
  98. function hasAttribute($a, $n, $v) {
  99. $vs = is_array($v) ? $v : array($v);
  100. $a_vs = $this->v($a . ' m', array(), $n['a']);
  101. return array_intersect($vs, $a_vs) ? 1 : 0;
  102. }
  103. function hasClass($n, $v) {
  104. return $this->hasAttribute('class', $n, $v);
  105. }
  106. function hasRel($n, $v) {
  107. return $this->hasAttribute('rel', $n, $v);
  108. }
  109. /* */
  110. function getDocBase() {
  111. $root_node = $this->getRootNode();
  112. $r = $root_node['doc_base'];
  113. foreach ($this->getSubNodes($root_node) as $root_child) {
  114. if ($root_child['tag'] == 'head') {
  115. foreach ($this->getSubNodes($root_child) as $head_child) {
  116. if ($head_child['tag'] == 'base') {
  117. $r = $head_child['a']['href'];
  118. break;
  119. }
  120. }
  121. }
  122. }
  123. return $r;
  124. }
  125. /* */
  126. function getPlainContent($n, $trim = 1) {
  127. if ($n['tag'] == 'comment') {
  128. $r = '';
  129. }
  130. elseif ($n['tag'] == 'cdata') {
  131. $r = $n['a']['value'];
  132. }
  133. elseif (trim($this->v('cdata', '', $n))) {
  134. $r = $n['cdata'];
  135. $sub_nodes = $this->getSubNodes($n);
  136. foreach ($sub_nodes as $sub_n) {
  137. $r .= $this->getPlainContent($sub_n, 0);
  138. }
  139. }
  140. elseif (($n['tag'] == 'img') && isset($n['a']['alt'])) {
  141. $r = $n['a']['alt'];
  142. }
  143. else {
  144. $r = '';
  145. $sub_nodes = $this->getSubNodes($n);
  146. foreach ($sub_nodes as $sub_n) {
  147. $r .= $this->getPlainContent($sub_n, 0);
  148. }
  149. }
  150. $r = preg_replace('/\s/s', ' ', $r);
  151. $r = preg_replace('/\s\s*/s', ' ', $r);
  152. return $trim ? trim($r) : $r;
  153. }
  154. function getContent($n, $outer = 0, $trim = 1) {
  155. //echo '<pre>' . htmlspecialchars(print_r($n, 1)) . '</pre>';
  156. if ($n['tag'] == 'comment') {
  157. $r = '<!-- ' . $n['a']['value'] . ' -->';
  158. }
  159. elseif ($n['tag'] == 'cdata') {
  160. $r = $n['a']['value'];
  161. }
  162. else {
  163. $r = '';
  164. if ($outer) {
  165. $r .= '<' . $n['tag'];
  166. asort($n['a']);
  167. if (isset($n['a']['xmlns']) && $n['a']['xmlns']['']) {
  168. $r .= ' xmlns="' . $n['a']['xmlns'][''] . '"';
  169. }
  170. foreach ($n['a'] as $a => $val) {
  171. if (!is_array($val) && isset($n['a'][$a . ' uri'])) $val = $n['a'][$a . ' uri'];
  172. $r .= preg_match('/^[^\s]+$/', $a) && !is_array($val) ? ' ' . $a . '="' . addslashes($val) . '"' : '';
  173. }
  174. $r .= $n['empty'] ? '/>' : '>';
  175. }
  176. if (!$n['empty']) {
  177. $r .= $this->v('cdata', '', $n);
  178. $sub_nodes = $this->getSubNodes($n);
  179. foreach ($sub_nodes as $sub_n) {
  180. $r .= $this->getContent($sub_n, 1, 0);
  181. }
  182. if ($outer) {
  183. $r .= '</' . $n['tag'] . '>';
  184. }
  185. }
  186. }
  187. return ($trim && !$this->keep_cdata_ws) ? trim($r) : $r;
  188. }
  189. /* */
  190. function getDocID($n) {
  191. $id = $n['id'];
  192. $k = 'doc_' . $id;
  193. if (!isset($this->caller->cache[$k])) {
  194. $this->caller->cache[$k] = $n['doc_url'];
  195. }
  196. return $this->caller->cache[$k];
  197. }
  198. function getDocOwnerID($n) {
  199. return '_:owner_of_' . $this->normalize($this->getDocID($n));
  200. }
  201. /* */
  202. function normalize($v) {
  203. $v = preg_replace('/[\W\s]+/is', '_', strip_tags(strtolower($v)));
  204. $v = preg_replace('/http/', '', $v);
  205. $v = preg_replace('/[\_]+/', '_', $v);
  206. //$v = substr($v, 0, 30);
  207. $v = trim($v, '_');
  208. return $v;
  209. }
  210. /* */
  211. }