PageRenderTime 84ms CodeModel.GetById 29ms RepoModel.GetById 6ms app.codeStats 0ms

/src/main/java/server/helper.php

https://gitlab.com/kedzior/dataset
PHP | 349 lines | 285 code | 39 blank | 25 comment | 61 complexity | 7fae6652446a350751b8ce63acc3784c MD5 | raw file
  1. <?php
  2. $GLOBALS['THRIFT_ROOT'] = '../curator/demo/thrift';
  3. // require_once $GLOBALS['THRIFT_ROOT'].'/Thrift.php';
  4. // require_once $GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php';
  5. // require_once $GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php';
  6. // require_once $GLOBALS['THRIFT_ROOT'].'/transport/THttpClient.php';
  7. // require_once $GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php';
  8. // require_once $GLOBALS['THRIFT_ROOT'].'/transport/TFramedTransport.php';
  9. // require_once $GLOBALS['THRIFT_ROOT'].'/packages/curator/Curator.php';
  10. // These my_ functions work out if php has multibyte support and uses
  11. // the mb version if available.
  12. // Note str_replace is multi-byte safe so we don't need a my_ version.
  13. mb_internal_encoding("UTF-8");
  14. function my_substr($s, $start, $length="-1") {
  15. if ($length == "-1") {
  16. $length = my_strlen($s) - $start;
  17. }
  18. if (function_exists('mb_substr')) {
  19. return mb_substr($s, $start, $length);
  20. } else {
  21. return substr($s, $start, $length);
  22. }
  23. }
  24. function my_strlen($s) {
  25. if (function_exists('mb_strlen')) {
  26. return mb_strlen($s);
  27. } else {
  28. return strlen($s);
  29. }
  30. }
  31. function my_reg_replace($pattern, $replacement, $string) {
  32. if (function_exists('mb_ereg_replace')) {
  33. return mb_ereg_replace($pattern, $replacement, $string);
  34. } else {
  35. return preg_replace($pattern, $replacment, $string);
  36. }
  37. }
  38. function sanitize_text($text) {
  39. $max_size = 10000;
  40. if (strlen($text) > $max_size)
  41. {
  42. $text = substr($text, 0, $max_size);
  43. }
  44. $text = strip_tags(str_replace(array("\r\n", "\r", "\n"), "\n", stripslashes($text)));
  45. $text = checkPunct($text); //only this is new -JB 7/7/14
  46. return $text;
  47. }
  48. //returns an error sentence if the ratio of punctuation characters to alphanumeric characters is >4
  49. function checkPunct($text){
  50. $numAlpha = 0;
  51. preg_match_all("/\w/", $text, $numAlpha);
  52. $numPunct = 0;
  53. preg_match_all("/\p{P}\p{S}/", $text, $numPunct);
  54. if ( (count($numAlpha[0])+1)/(count($numPunct[0])+1) < 4.0 ){
  55. $text = "Too much punctuation, please try again.";
  56. }
  57. return $text;
  58. }
  59. function nls2p($str) {
  60. return str_replace('<p></p>', '', '<p>'
  61. . my_preg_replace('#([\r\n]\s*?[\r\n]){2,}#', '</p>$0<p>', $str)
  62. . '</p>');
  63. }
  64. // Check if the label is going to cause problems when used inside a html attribute
  65. function is_bad_label($label) {
  66. return $label == "#" or $label == '$' or $label == "''" or $label == "``" or $label == "'" or $label == "`" or $label == "," or $label == "." or $label ==":";
  67. }
  68. //builds an array from labels
  69. function build_array($labels) {
  70. $result = array();
  71. foreach ($labels as $i => $span) {
  72. if (!isset($result[$span->start])) {
  73. $result[$span->start] = array();
  74. }
  75. $result[$span->start][$span->ending] = $span;
  76. }
  77. return $result;
  78. }
  79. function getLabelingHTML($text, $labeling, $name, $newline = false, $offset = 0) {
  80. if (is_null($labeling)) { return ""; }
  81. $labels = $labeling->labels;
  82. $result = '';
  83. foreach ($labels as $i => $span) {
  84. $pos = $i+$offset;
  85. $result .= getSpanHTML($text, $span, $name.$pos);
  86. if ($newline) $result .= "<br/>";
  87. }
  88. return $result;
  89. }
  90. function getSpanHTML($text, $span, $name) {
  91. $result = "";
  92. $result .= '<span class="span" id="' . $name. '">';
  93. if (!is_null($span->label)) {
  94. if (is_bad_label($span->label)) {
  95. $result .= '<span class="label PUNC">';
  96. } else {
  97. $result .= '<span class="label '.$span->label.'">';
  98. }
  99. $result .= htmlspecialchars($span->label);
  100. $result .= "</span>";
  101. $result .= ' ';
  102. }
  103. if (is_bad_label($span->label)) {
  104. $result .= '<span class="token PUNC">';
  105. } else {
  106. $result .= '<span class="token '.$span->label.'">';
  107. }
  108. $result .= htmlspecialchars(my_substr($text, $span->start, $span->ending - $span->start));
  109. $result .= '</span>';
  110. if (!is_null($span->attributes)) {
  111. foreach ($span->attributes as $key => $value) {
  112. $result .= ' <span class="attribute ' .$span->label.'">';
  113. $result .= htmlspecialchars("[".$key . ": " . $value."]");
  114. $result .= "</span>&nbsp;";
  115. }
  116. }
  117. $result .= '</span>&nbsp;';
  118. return $result;
  119. }
  120. function getLabelingJavascript($labeling, $name, $offset=0) {
  121. if (is_null($labeling)) { return ""; }
  122. $labels = $labeling->labels;
  123. $result = "";
  124. foreach ($labels as $i => $span) {
  125. $result .= getSpanJavascript($span, $name, $i+$offset);
  126. }
  127. return $result;
  128. }
  129. function getSpanJavascript($span, $name, $i) {
  130. if (is_null($span)) { return ""; }
  131. $result = '$("#' . $name . $i . '").click( function() {'. "\n";
  132. $result .= '$("#text").removeHighlight();' . "\n";
  133. $result .= '$("#text").highlight(' . "$span->start, $span->ending, 'highlight');\n";
  134. $result .= "});\n";
  135. return $result;
  136. }
  137. function getForestHTML($text, $forest, $name, $spans=false) {
  138. if (is_null($forest)) { return ""; }
  139. $result = "";
  140. $counter = 0;
  141. foreach ($forest->trees as $i => $tree) {
  142. $topnode = $tree->nodes[$tree->top];
  143. if (isset($tree->score)) {
  144. $result .= 'Score: ' .$tree->score .'<br/>';
  145. }
  146. $result .= getNodeHTML($text, $topnode, $tree->nodes, 0, $name, $counter, "", true, $spans);
  147. $result .= "<br/>";
  148. }
  149. return $result;
  150. }
  151. function getForestJavascript($forest, $name) {
  152. if (is_null($forest)) { return ""; }
  153. $counter = 0;
  154. $result = "";
  155. foreach ($forest->trees as $i => $tree) {
  156. $topnode = $tree->nodes[$tree->top];
  157. $result .= getNodeJavascript($topnode, $tree->nodes, $name, $counter);
  158. }
  159. return $result;
  160. }
  161. function getNodeHTML($text, $node, $nodes, $padding, $name, &$counter, $label="", $first=true, $spans=false) {
  162. $result = "";
  163. $dpadding = 0;
  164. $counter += 1;
  165. if (!$first) {
  166. for ($i = 0; $i < $padding; $i++) {
  167. $result .= "&nbsp;";
  168. }
  169. }
  170. if (!($label == "")) {
  171. $result .= "<span class='edge'>&lt;$label&gt;</span>&nbsp;";
  172. $dpadding += my_strlen($label) + 2;
  173. }
  174. $span = $node->span;
  175. if (!($node->label == "dependency node")) {
  176. $result .= '<span class="label" id="' .$name . $counter. '">'.$node->label.'</span>&nbsp;&nbsp;';
  177. $dpadding += my_strlen($node->label) + 2;
  178. }
  179. if ($spans) {
  180. $result .= getSpanHTML($text, $span, $name.$counter);
  181. $result .= '<br/>';
  182. //$dpadding += $span->ending-$span->start+1;
  183. $dpadding++;
  184. }
  185. if (!is_null($node->children) && !empty($node->children)) {
  186. ksort($node->children);
  187. $padding += $dpadding;
  188. $first = true;
  189. if ($spans) {
  190. $first = false;
  191. }
  192. foreach ($node->children as $index => $nlabel) {
  193. $result .= getNodeHTML($text, $nodes[$index], $nodes, $padding, $name, $counter, $nlabel, $first, $spans);
  194. $first = false;
  195. }
  196. } else if (!$spans) {
  197. $result .= getSpanHTML($text, $span, "");
  198. $result .= '<br/>';
  199. }
  200. return $result;
  201. }
  202. function getNodeJavascript($node, $nodes, $name, &$counter) {
  203. $counter++;
  204. $result = "";
  205. $result .= getSpanJavascript($node->span, $name, $counter);
  206. if (!is_null($node->children) && !empty($node->children)) {
  207. ksort($node->children);
  208. foreach ($node->children as $index => $nlabel) {
  209. $result .= getNodeJavascript($nodes[$index], $nodes, $name, $counter);
  210. }
  211. }
  212. return $result;
  213. }
  214. function wikify2($record, $wikid){
  215. // build the wiki entry array first.
  216. $wiki_entries = array();
  217. $wiki_entries_end = array();
  218. foreach($wikid->labels as $id => $span) {
  219. if((array_key_exists($span->start, $wiki_entries) && $span->ending > $wiki_entries[$span->start]["end"]) || !array_key_exists($span->start, $wiki_entries)) {
  220. $wiki_entries[$span->start] = array('label' => $span->label, 'is_linked' => $span->attributes["IsLinked"], 'cat_attribs' => $span->attributes["TitleWikiCatAttribs"], 'end' => $span->ending, 'text' => my_substr($record->rawText, $span->start, $span->ending - $span->start));
  221. if(array_key_exists($span->ending, $wiki_entries_end) && $span->start < $wiki_entries_end[$span->ending]) {
  222. // replace the wiki entry starting at a later point, but ending at the same point with this one
  223. unset($wiki_entries[$wiki_entries_end[$span->ending]]);
  224. $wiki_entries_end[$span->ending] = $span->start;
  225. } else if(!array_key_exists($span->ending, $wiki_entries_end)) {
  226. $wiki_entries_end[$span->ending] = $span->start;
  227. } else {
  228. unset($wiki_entries[$span->start]);
  229. }
  230. }
  231. }
  232. // now we have a wiki entry array indexed by span start position. let's print out the all of the data! (with links!)
  233. ksort($wiki_entries);
  234. //echo "<pre>";
  235. //var_dump($wiki_entries);
  236. //echo "</pre>";
  237. $result = mb_convert_encoding($record->rawText, 'UTF-8');
  238. $offset = 0;
  239. $lastStart = 0;
  240. $lastEnd = 0;
  241. foreach($wiki_entries as $start_pos => $entry) {
  242. //If we have an entry that is completely within the last entry, skip it. Prevents link within a link problems
  243. if($start_pos > $lastStart && $entry["end"] < $lastEnd)continue;
  244. $lastStart = $start_pos;
  245. $lastEnd = $entry["end"];
  246. $start_pos += $offset;
  247. $entry["end"] += $offset;
  248. if($entry["is_linked"] == "true") { //$entry["label"] != "UNMAPPED") {
  249. $result = my_substr($result, 0, $start_pos)."<a class=\"wiki\" href=\"".$entry["label"]."\" cat=\"".$entry["cat_attribs"]."\">".my_substr($result, $start_pos, $entry["end"] - $start_pos)."</a>".my_substr($result, $entry["end"]);
  250. $offset += my_strlen("<a class=\"wiki\" href=\"\" cat=\"\"></a>"); // extra text when we link
  251. $offset += my_strlen($entry["label"]);
  252. $offset += my_strlen($entry["cat_attribs"]);
  253. } else if($entry["label"] == "UNMAPPED") {
  254. $result = my_substr($result, 0, $start_pos)."<b>".my_substr($result, $start_pos, $entry["end"] - $start_pos)."</b>".my_substr($result, $entry["end"]);
  255. $offset += my_strlen("<b></b>");
  256. }
  257. }
  258. //return "<pre>".print_r($wiki_entries, true)."</pre>";
  259. return nl2br($result);
  260. }
  261. //this is used in the demos
  262. function getHTMLForLabels($record, $labeling,$demoName=null) {
  263. if($demoName == "wikifier") {
  264. return wikify2($record,$labeling);
  265. }
  266. $labels = build_array($labeling->labels);
  267. $rawtext = $record->rawText;
  268. $sents = $record->labelViews["sentences"]->labels;
  269. ksort($labels);
  270. $previous = 0;
  271. $j = 0;
  272. $result = '<div class="output"><p><span class="sentence">';
  273. foreach ($labels as $start => $ends) {
  274. if ($start > $sents[$j]->ending) {
  275. if ($previous < $sents[$j]->ending) {
  276. $result .= htmlspecialchars(my_substr($rawtext, $previous, $sents[$j]->ending - $previous));
  277. $previous = $sents[$j]->ending;
  278. }
  279. $result .= '</span><span class="sentence">';
  280. $j = $j + 1;
  281. }
  282. ksort($ends);
  283. if ($start - $previous > 0) {
  284. $result .= htmlspecialchars(my_substr($rawtext, $previous, $start - $previous));
  285. }
  286. foreach ($ends as $end => $span) {
  287. $result .= getSpanHTML($rawtext, $span, "");
  288. $previous = $end;
  289. }
  290. }
  291. $result .= htmlspecialchars(my_substr($rawtext, $previous));
  292. $result .= '</span></p></div>';
  293. return nl2br($result);
  294. }
  295. function predict_roles($verb) {
  296. $h = popen("../demo_functions/rolepredict.sh ".$verb, "r");
  297. /* $h = popen("../bin/role-predict.sh ".$verb, "r"); */
  298. $roles = array();
  299. while(!feof($h)) {
  300. $line = fgets($h);
  301. preg_match("/(\S+)\s+(.+)/", $line, $m);
  302. if(count($m) == 3) {
  303. $roles[$m[1]] = $m[2];
  304. }
  305. }
  306. pclose($h);
  307. /* print_r($roles); */
  308. return $roles;
  309. }
  310. ?>