PageRenderTime 58ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/ems/web/analysis_diff.php

https://bitbucket.org/ufal/mosesdecoder
PHP | 816 lines | 705 code | 81 blank | 30 comment | 164 complexity | 1e11e0e5e548980f801bcbb4fcfbf0b5 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-3.0, LGPL-3.0
  1. <?php
  2. function diff_analysis() {
  3. global $task,$user,$setup,$id,$id2,$set;
  4. global $comment,$dir;
  5. head("Comparative Analysis: $task ($user), Set $set");
  6. $c = $comment["$setup-$id"]->name;
  7. $c2 = $comment["$setup-$id2"]->name;
  8. if (substr($c2,0,strlen($id)+1) == $id."+") {
  9. print "Run $id2 vs $id (".substr($c2,strlen($id)).")";
  10. }
  11. else {
  12. print "Run $id2 ($c2) vs $id ($c)";
  13. }
  14. print "</h4>";
  15. ?><script language="javascript" src="/javascripts/prototype.js"></script>
  16. <script language="javascript" src="/javascripts/scriptaculous.js"></script>
  17. <script>
  18. function diff(field,sort,count) {
  19. var url = '?analysis_diff=' + field + '_diff'
  20. + '&setup=<?php print $setup ?>'
  21. + '&id=<?php print $id ?>'
  22. + '&id2=<?php print $id2 ?>'
  23. + '&set=<?php print $set ?>'
  24. + '&sort=' + sort
  25. + '&count=' + count;
  26. new Ajax.Updater(field, url, { method: 'get' });
  27. }
  28. function ngram_diff(type,order,count,sort,smooth) {
  29. var url = '?analysis_diff=ngram_' + type + '_diff'
  30. + '&setup=<?php print $setup ?>'
  31. + '&id=<?php print $id ?>'
  32. + '&id2=<?php print $id2 ?>'
  33. + '&set=<?php print $set ?>'
  34. + '&order=' + order
  35. + '&smooth=' + smooth
  36. + '&sort=' + sort
  37. + '&count=' + count;
  38. var field = (type == "precision" ? "nGramPrecision" : "nGramRecall") + order;
  39. new Ajax.Updater(field, url, { method: 'get' });
  40. }
  41. function generic_show_diff(field,parameters) {
  42. var url = '?analysis=' + field + '_show'
  43. + '&setup=<?php print $setup ?>'
  44. + '&id=<?php print $id ?>'
  45. + '&id2=<?php print $id2 ?>'
  46. + '&set=<?php print $set ?>'
  47. + '&' + parameters;
  48. new Ajax.Updater(field, url, { method: 'get', evalScripts: true });
  49. }
  50. </script>
  51. </head>
  52. <body>
  53. <div id="nGramSummary"><?php ngram_summary_diff() ?></div>
  54. <div id="PrecisionByCoverageDiff"></div>
  55. <div id="PrecisionRecallDetailsDiff"></div>
  56. <div id="bleu">(loading...)</div>
  57. <script>
  58. diff('bleu','',5);
  59. </script>
  60. </body></html>
  61. <?php
  62. }
  63. function precision_by_coverage_diff() {
  64. global $experiment,$evalset,$dir,$set,$id,$id2;
  65. $img_width = 1000;
  66. print "<h3>Precision by Coverage</h3>";
  67. print "The graphs display what ratio of words of a specific type are translated correctly (yellow), and what ratio is deleted (blue).";
  68. print " The extend of the boxes is scaled on the x-axis by the number of tokens of the displayed type.";
  69. // load data
  70. $data = file(get_current_analysis_filename2("precision","precision-by-corpus-coverage"));
  71. $total = 0;
  72. $log_info = array();
  73. for($i=0;$i<count($data);$i++) {
  74. $item = split("\t",$data[$i]);
  75. $info[$item[0]]["precision"] = $item[1];
  76. $info[$item[0]]["delete"] = $item[2];
  77. $info[$item[0]]["length"] = $item[3];
  78. $info[$item[0]]["total"] = $item[4];
  79. $total += $item[4];
  80. $log_count = -1;
  81. if ($item[0]>0) {
  82. $log_count = (int) (log($item[0])/log(2));
  83. }
  84. if (!array_key_exists($log_count,$log_info)) {
  85. $log_info[$log_count]["precision"] = 0;
  86. $log_info[$log_count]["delete"] = 0;
  87. $log_info[$log_count]["length"] = 0;
  88. $log_info[$log_count]["total"] = 0;
  89. }
  90. $log_info[$log_count]["precision"] += $item[1];
  91. $log_info[$log_count]["delete"] += $item[2];
  92. $log_info[$log_count]["length"] += $item[3];
  93. $log_info[$log_count]["total"] += $item[4];
  94. }
  95. $log_info_new = $log_info;
  96. // load base data
  97. $data = file(get_current_analysis_filename("precision","precision-by-corpus-coverage"));
  98. for($i=0;$i<count($data);$i++) {
  99. $item = split("\t",$data[$i]);
  100. $info[$item[0]]["precision"] -= $item[1];
  101. $info[$item[0]]["delete"] -= $item[2];
  102. $info[$item[0]]["length"] -= $item[3];
  103. $log_count = -1;
  104. if ($item[0]>0) {
  105. $log_count = (int) (log($item[0])/log(2));
  106. }
  107. $log_info[$log_count]["precision"] -= $item[1];
  108. $log_info[$log_count]["delete"] -= $item[2];
  109. $log_info[$log_count]["length"] -= $item[3];
  110. }
  111. print "<h4>By log<sub>2</sub>-count in the training corpus</h4>";
  112. precision_by_coverage_diff_graph("byCoverage",$log_info,$log_info_new,$total,$img_width,SORT_NUMERIC);
  113. // load factored data
  114. $d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id));
  115. while (false !== ($file = $d->read())) {
  116. if (preg_match('/precision-by-corpus-coverage.(.+)$/',$file, $match) &&
  117. file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage.$match[1]"))) {
  118. precision_by_coverage_diff_factored($img_width,$total,$file,$match[1]);
  119. }
  120. }
  121. }
  122. function precision_by_coverage_diff_factored($img_width,$total,$file,$factor_id) {
  123. global $dir,$set,$id,$id2;
  124. $data = file(get_current_analysis_filename2("precision",$file));
  125. for($i=0;$i<count($data);$i++) {
  126. $item = split("\t",$data[$i]);
  127. $factor = $item[0];
  128. $count = $item[1];
  129. $info_factored[$factor][$count]["precision"] = $item[2];
  130. $info_factored[$factor][$count]["delete"] = $item[3];
  131. $info_factored[$factor][$count]["length"] = $item[4];
  132. $info_factored[$factor][$count]["total"] = $item[5];
  133. $info_factored_sum[$factor]["precision"] += $item[2];
  134. $info_factored_sum[$factor]["delete"] += $item[3];
  135. $info_factored_sum[$factor]["length"] += $item[4];
  136. $info_factored_sum[$factor]["total"] += $item[5];
  137. $total_factored[$factor] += $item[5];
  138. $log_count = -1;
  139. if ($count>0) {
  140. $log_count = (int) (log($count)/log(2));
  141. }
  142. $log_info_factored[$factor][$log_count]["precision"] += $item[2];
  143. $log_info_factored[$factor][$log_count]["delete"] += $item[3];
  144. $log_info_factored[$factor][$log_count]["length"] += $item[4];
  145. $log_info_factored[$factor][$log_count]["total"] += $item[5];
  146. }
  147. $info_factored_new = $info_factored;
  148. $info_factored_sum_new = $info_factored_sum;
  149. $log_info_factored_new = $log_info_factored;
  150. // baseline data
  151. $data = file(get_current_analysis_filename("precision",$file));
  152. for($i=0;$i<count($data);$i++) {
  153. $item = split("\t",$data[$i]);
  154. $factor = $item[0];
  155. $count = $item[1];
  156. $info_factored[$factor][$count]["precision"] -= $item[2];
  157. $info_factored[$factor][$count]["delete"] -= $item[3];
  158. $info_factored[$factor][$count]["length"] -= $item[4];
  159. $info_factored_sum[$factor]["precision"] -= $item[2];
  160. $info_factored_sum[$factor]["delete"] -= $item[3];
  161. $info_factored_sum[$factor]["length"] -= $item[4];
  162. $log_count = -1;
  163. if ($count>0) {
  164. $log_count = (int) (log($count)/log(2));
  165. }
  166. $log_info_factored[$factor][$log_count]["precision"] -= $item[2];
  167. $log_info_factored[$factor][$log_count]["delete"] -= $item[3];
  168. $log_info_factored[$factor][$log_count]["length"] -= $item[4];
  169. }
  170. print "<h4>By factor ".factor_name("input",$factor_id)."</h4>";
  171. precision_by_coverage_diff_graph("byFactor",$info_factored_sum,$info_factored_sum_new,$total,$img_width,SORT_STRING);
  172. print "<h4>For each factor, by log<sub>2</sub>-count in the corpus</h4>";
  173. foreach ($log_info_factored as $factor => $info) {
  174. if ($total_factored[$factor]/$total > 0.01) {
  175. print "<table style=\"display:inline;\"><tr><td align=center><font size=-2><b>$factor</b></font>";
  176. precision_by_coverage_diff_graph("byCoverageFactor$factor",$info,$log_info_factored_new[$factor],$total,10+2*$img_width*$total_factored[$factor]/$total,SORT_NUMERIC);
  177. print "</td></tr></table>";
  178. }
  179. }
  180. }
  181. function precision_by_word_diff($type) {
  182. global $dir,$set,$id,$id2;
  183. $byCoverage = -2;
  184. $byFactor = "false";
  185. if ($type == "byCoverage") {
  186. $byCoverage = (int) $_GET["type"];
  187. }
  188. else if ($type == "byFactor") {
  189. $byFactor = $_GET["type"];
  190. }
  191. else if (preg_match("/byCoverageFactor(.+)/",$type,$match)) {
  192. $byCoverage = (int) $_GET["type"];
  193. $byFactor = $match[1];
  194. }
  195. $data = file(get_current_analysis_filename2("precision","precision-by-input-word"));
  196. $total = 0;
  197. $info = array();
  198. for($i=0;$i<count($data);$i++) {
  199. $line = rtrim($data[$i]);
  200. $item = split("\t",$line);
  201. $total += $item[3];
  202. //# filter for count
  203. $count = $item[4];
  204. $log_count = -1;
  205. if ($count>0) {
  206. $log_count = (int) (log($count)/log(2));
  207. }
  208. if ($byCoverage != -2 && $byCoverage != $log_count) {
  209. continue;
  210. }
  211. //# filter for factor
  212. $word = $item[5];
  213. if ($byFactor != "false" && $byFactor != $item[6]) {
  214. continue;
  215. }
  216. if (!array_key_exists($word,$info)) {
  217. $info[$word]["precision"] = 0;
  218. $info[$word]["delete"] = 0;
  219. $info[$word]["length"] = 0;
  220. $info[$word]["total"] = 0;
  221. }
  222. $info[$word]["precision"] += $item[0];
  223. $info[$word]["delete"] += $item[1];
  224. $info[$word]["length"] += $item[2];
  225. $info[$word]["total"] += $item[3];
  226. }
  227. $info_new = $info;
  228. $data = file(get_current_analysis_filename("precision","precision-by-input-word"));
  229. for($i=0;$i<count($data);$i++) {
  230. $line = rtrim($data[$i]);
  231. $item = split("\t",$line);
  232. //# filter for count
  233. $count = $item[4];
  234. $log_count = -1;
  235. if ($count>0) {
  236. $log_count = (int) (log($count)/log(2));
  237. }
  238. if ($byCoverage != -2 && $byCoverage != $log_count) {
  239. continue;
  240. }
  241. //# filter for factor
  242. $word = $item[5];
  243. if ($byFactor != "false" && $byFactor != $item[6]) {
  244. continue;
  245. }
  246. if (!array_key_exists($word,$info)) {
  247. $info[$word]["precision"] = 0;
  248. $info[$word]["delete"] = 0;
  249. $info[$word]["length"] = 0;
  250. $info_new[$word]["length"] = 0;
  251. $info_new[$word]["delete"] = 0;
  252. $info_new[$word]["precision"] = 0;
  253. $info_new[$word]["total"] = 0;
  254. $info[$word]["total"] = -$item[3];
  255. }
  256. $info[$word]["precision"] -= $item[0];
  257. $info[$word]["delete"] -= $item[1];
  258. $info[$word]["length"] -= $item[2];
  259. }
  260. print "<table border=1><tr><td align=center>&nbsp;</td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td><td align=center>Length</td></tr>\n";
  261. foreach ($info as $word => $wordinfo) {
  262. print "<tr><td align=center>$word</td>";
  263. printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$info_new[$word]["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"],$wordinfo["total"]);
  264. printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$wordinfo["precision"]/$total*100,"%",$wordinfo["precision"],$total);
  265. printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$info_new[$word]["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"],$wordinfo["total"]);
  266. printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$wordinfo["delete"]/$total*100,"%",$wordinfo["delete"],$total);
  267. printf("<td align=right>%+.3f</td>",$wordinfo["length"]/$wordinfo["total"]);
  268. print "</tr>";
  269. }
  270. print "</table>\n";
  271. }
  272. function precision_by_coverage_diff_graph($name,$log_info,$log_info_new,$total,$img_width,$sort_type) {
  273. $keys = array_keys($log_info);
  274. sort($keys,$sort_type);
  275. print "<div id=\"Toggle$name\" onClick=\"document.getElementById('Table$name').style.display = 'none'; this.style.display = 'none';\" style=\"display:none;\"><font size=-2>(hide table)</font></div>\n";
  276. print "<div id=\"Table$name\" style=\"display:none;\">\n";
  277. print "<table border=1><tr><td align=center>&nbsp;</td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td><td align=center>Length</td></tr>\n";
  278. foreach ($keys as $i) {
  279. if (array_key_exists($i,$log_info)) {
  280. print "<tr><td align=center>$i</td>";
  281. printf("<td align=right>%.1f%s</td><td align=right>%.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$log_info_new[$i]["precision"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["precision"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["precision"],$log_info[$i]["total"]);
  282. printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$log_info[$i]["precision"]/$total*100,"%",$log_info[$i]["precision"],$total);
  283. printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$log_info_new[$i]["delete"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["delete"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["delete"],$log_info[$i]["total"]);
  284. printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$log_info[$i]["delete"]/$total*100,"%",$log_info[$i]["delete"],$total);
  285. printf("<td align=right>%+.3f</td>",$log_info[$i]["length"]/$log_info[$i]["total"]);
  286. print "<td><A HREF=\"javascript:generic_show_diff('PrecisionByWordDiff$name','type=$i')\">&#x24BE;</A></td>";
  287. print "</tr>";
  288. }
  289. }
  290. print "</table><div id=\"PrecisionByWordDiff$name\"></div></div>";
  291. print "<div id=\"Graph$name\" onClick=\"document.getElementById('Table$name').style.display = 'block'; document.getElementById('Toggle$name').style.display = 'block';\"><canvas id=\"$name\" width=$img_width height=300></canvas></div>";
  292. print "<script language=\"javascript\">
  293. var canvas = document.getElementById(\"$name\");
  294. var ctx = canvas.getContext(\"2d\");
  295. ctx.lineWidth = 0.5;
  296. ctx.font = '9px serif';
  297. ";
  298. for($line=-1;$line<=0.8;$line+=.2) {
  299. $height = 90-$line/2*180;
  300. print "ctx.moveTo(20, $height);\n";
  301. print "ctx.lineTo($img_width, $height);\n";
  302. print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);";
  303. }
  304. for($line=-0.4;$line<=0.4;$line+=.2) {
  305. $height = 250+$line/2*180;
  306. print "ctx.moveTo(20, $height);\n";
  307. print "ctx.lineTo($img_width, $height);\n";
  308. if ($line != 0) {
  309. print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);";
  310. }
  311. }
  312. print "ctx.strokeStyle = \"rgb(100,100,100)\"; ctx.stroke();\n";
  313. $total = 0;
  314. foreach ($keys as $i) {
  315. $total += $log_info[$i]["total"];
  316. }
  317. $total_so_far = 0;
  318. foreach ($keys as $i) {
  319. $prec_ratio = $log_info[$i]["precision"]/$log_info[$i]["total"];
  320. $x = (int)(20+($img_width-20) * $total_so_far / $total);
  321. $y = (int)(90-($prec_ratio*180*5));
  322. $width = (int)($img_width * $log_info[$i]["total"]/$total);
  323. $height = (int)($prec_ratio*180*5);
  324. print "ctx.fillStyle = \"rgb(200,200,0)\";";
  325. print "ctx.fillRect ($x, $y, $width, $height);";
  326. $del_ratio = $log_info[$i]["delete"]/$log_info[$i]["total"];
  327. $height = (int)($del_ratio*180*5);
  328. print "ctx.fillStyle = \"rgb(100,100,255)\";";
  329. print "ctx.fillRect ($x, 250, $width, $height);";
  330. $total_so_far += $log_info[$i]["total"];
  331. if ($width>3) {
  332. print "ctx.fillStyle = \"rgb(0,0,0)\";";
  333. // print "ctx.rotate(-1.5707);";
  334. print "ctx.fillText(\"$i\", $x+$width/2-3, 190);";
  335. //print "ctx.rotate(1.5707);";
  336. }
  337. }
  338. print "</script>";
  339. }
  340. // stats on precision and recall
  341. function precision_recall_details_diff() {
  342. ?>
  343. <table width=100%>
  344. <tr>
  345. <td width=25% valign=top><div id="nGramPrecision1">(loading...)</div></td>
  346. <td width=25% valign=top><div id="nGramPrecision2">(loading...)</div></td>
  347. <td width=25% valign=top><div id="nGramPrecision3">(loading...)</div></td>
  348. <td width=25% valign=top><div id="nGramPrecision4">(loading...)</div></td>
  349. </tr><tr>
  350. <td width=25% valign=top><div id="nGramRecall1">(loading...)</div></td>
  351. <td width=25% valign=top><div id="nGramRecall2">(loading...)</div></td>
  352. <td width=25% valign=top><div id="nGramRecall3">(loading...)</div></td>
  353. <td width=25% valign=top><div id="nGramRecall4">(loading...)</div></td>
  354. </tr></table>
  355. <script language="javascript">
  356. ngram_diff('precision',1,5,'',0);
  357. ngram_diff('precision',2,5,'',0);
  358. ngram_diff('precision',3,5,'',0);
  359. ngram_diff('precision',4,5,'',0);
  360. ngram_diff('recall',1,5,'',0);
  361. ngram_diff('recall',2,5,'',0);
  362. ngram_diff('recall',3,5,'',0);
  363. ngram_diff('recall',4,5,'',0);
  364. </script>
  365. <?php
  366. }
  367. function ngram_summary_diff() {
  368. global $experiment,$evalset,$dir,$set,$id,$id2;
  369. // load data
  370. for($idx=0;$idx<2;$idx++) {
  371. $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","summary"));
  372. for($i=0;$i<count($data);$i++) {
  373. $item = split(": ",$data[$i]);
  374. $info[$idx][$item[0]] = $item[1];
  375. }
  376. }
  377. print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>";
  378. print "<b>Precision of Output</b><br>";
  379. //foreach (array("precision","recall") as $type) {
  380. $type = "precision";
  381. print "<table><tr><td>$type</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n";
  382. printf("<tr><td>correct</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n",
  383. $info[1]["$type-1-correct"],$info[1]["$type-1-correct"]-$info[0]["$type-1-correct"],
  384. $info[1]["$type-2-correct"],$info[1]["$type-2-correct"]-$info[0]["$type-2-correct"],
  385. $info[1]["$type-3-correct"],$info[1]["$type-3-correct"]-$info[0]["$type-3-correct"],
  386. $info[1]["$type-4-correct"],$info[1]["$type-4-correct"]-$info[0]["$type-4-correct"]);
  387. printf("<tr><td>&nbsp;</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td></tr>\n",
  388. $info[1]["$type-1-correct"]/$info[1]["$type-1-total"]*100,'%',$info[1]["$type-1-correct"]/$info[1]["$type-1-total"]*100-$info[0]["$type-1-correct"]/$info[0]["$type-1-total"]*100,'%',
  389. $info[1]["$type-2-correct"]/$info[1]["$type-2-total"]*100,'%',$info[1]["$type-2-correct"]/$info[1]["$type-2-total"]*100-$info[0]["$type-2-correct"]/$info[0]["$type-2-total"]*100,'%',
  390. $info[1]["$type-3-correct"]/$info[1]["$type-3-total"]*100,'%',$info[1]["$type-3-correct"]/$info[1]["$type-3-total"]*100-$info[0]["$type-3-correct"]/$info[0]["$type-3-total"]*100,'%',
  391. $info[1]["$type-4-correct"]/$info[1]["$type-4-total"]*100,'%',$info[1]["$type-4-correct"]/$info[1]["$type-4-total"]*100-$info[0]["$type-4-correct"]/$info[0]["$type-4-total"]*100,'%');
  392. printf("<tr><td>wrong</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n",
  393. $info[1]["$type-1-total"]-$info[1]["$type-1-correct"],($info[1]["$type-1-total"]-$info[1]["$type-1-correct"])-($info[0]["$type-1-total"]-$info[0]["$type-1-correct"]),
  394. $info[1]["$type-2-total"]-$info[1]["$type-2-correct"],($info[1]["$type-2-total"]-$info[1]["$type-2-correct"])-($info[0]["$type-2-total"]-$info[0]["$type-2-correct"]),
  395. $info[1]["$type-3-total"]-$info[1]["$type-3-correct"],($info[1]["$type-3-total"]-$info[1]["$type-3-correct"])-($info[0]["$type-3-total"]-$info[0]["$type-3-correct"]),
  396. $info[1]["$type-4-total"]-$info[1]["$type-4-correct"],($info[1]["$type-4-total"]-$info[1]["$type-4-correct"])-($info[0]["$type-4-total"]-$info[0]["$type-4-correct"]));
  397. print "</table>";
  398. //}
  399. print "<A HREF=\"javascript:generic_show_diff('PrecisionRecallDetailsDiff','')\">details</A> ";
  400. if (file_exists(get_current_analysis_filename("precision","precision-by-corpus-coverage")) &&
  401. file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage"))) {
  402. print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">precision of input by coverage</A> ";
  403. }
  404. print "</td><td valign=top align=center bgcolor=#eeeeee>";
  405. print "<b>Metrics</b><br>\n";
  406. for($idx=0;$idx<2;$idx++) {
  407. $each_score = explode(" ; ",$experiment[$idx?$id2:$id]->result[$set]);
  408. for($i=0;$i<count($each_score);$i++) {
  409. if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
  410. preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
  411. $score[$match[2]][$idx] = $match[1];
  412. }
  413. }
  414. }
  415. $header = ""; $score_line = ""; $diff_line = "";
  416. foreach ($score as $name => $value) {
  417. $header .= "<td>$name</td>";
  418. $score_line .= "<td>".$score[$name][1]."</td>";
  419. $diff_line .= sprintf("<td>%+.2f</td>",$score[$name][1]-$score[$name][0]);
  420. }
  421. print "<table border=1><tr>".$header."</tr><tr>".$score_line."</tr><tr>".$diff_line."</tr></table>";
  422. printf("length-diff<br>%d (%+d)",$info[1]["precision-1-total"]-$info[1]["recall-1-total"],$info[1]["precision-1-total"]-$info[0]["precision-1-total"]);
  423. print "</td><tr><table>";
  424. }
  425. function bleu_diff() {
  426. $count = $_GET['count'];
  427. if ($count == 0) { $count = 5; }
  428. print "<b>annotated sentences</b><br>";
  429. print "<font size=-1>sorted by ";
  430. if ($_GET['sort'] == "order" || $_GET['sort'] == "") {
  431. print "order ";
  432. }
  433. else {
  434. print "<A HREF=\"javascript:diff('bleu','order',$count)\">order</A> ";
  435. }
  436. if ($_GET['sort'] == "better") {
  437. print "order ";
  438. }
  439. else {
  440. print "<A HREF=\"javascript:diff('bleu','better',$count)\">better</A> ";
  441. }
  442. if ($_GET['sort'] == "worse") {
  443. print "order ";
  444. }
  445. else {
  446. print "<A HREF=\"javascript:diff('bleu','worse',$count)\">worse</A> ";
  447. }
  448. print "display <A HREF=\"\">fullscreen</A> ";
  449. $count = $_GET['count'];
  450. if ($count == 0) { $count = 5; }
  451. print "showing $count ";
  452. print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',5+$count)\">more</A> ";
  453. if ($count > 5) {
  454. print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',$count-5)\">less</A> ";
  455. }
  456. print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',9999)\">all</A> ";
  457. print "</font><BR>\n";
  458. bleu_diff_annotation();
  459. }
  460. function bleu_diff_annotation() {
  461. global $set,$id,$id2,$dir;
  462. // load data
  463. for($idx=0;$idx<2;$idx++) {
  464. $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","bleu-annotation"));
  465. for($i=0;$i<count($data);$i++) {
  466. $item = split("\t",$data[$i]);
  467. $annotation[$item[1]]["bleu$idx"] = $item[0];
  468. $annotation[$item[1]]["system$idx"] = $item[2];
  469. $annotation[$item[1]]["reference"] = $item[3];
  470. $annotation[$item[1]]["id"] = $item[1];
  471. }
  472. }
  473. $data = array();
  474. $identical=0; $same=0; $better=0; $worse=0;
  475. for($i=0;$i<count($annotation);$i++) {
  476. if ($annotation[$i]["system1"] == $annotation[$i]["system0"]) {
  477. $identical++;
  478. }
  479. else if ($annotation[$i]["bleu1"] == $annotation[$i]["bleu0"]) {
  480. $same++;
  481. }
  482. else if ($annotation[$i]["bleu1"] > $annotation[$i]["bleu0"]) {
  483. $better++;
  484. }
  485. else {
  486. $worse++;
  487. }
  488. }
  489. print "<table><tr><td>identical</td><td>same</td><td>better</td><td>worse</td></tr>\n";
  490. printf("<tr><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>\n", $identical, $same, $better, $worse);
  491. printf("<tr><td>%d%s</td><td>%d%s</td><td>%d%s</td><td>%d%s</td></tr></table>\n", $identical*100/count($annotation)+.5, '%', $same*100/count($annotation)+.5, '%', $better*100/count($annotation)+.5, '%', $worse*100/count($annotation)+.5, '%');
  492. // print "identical: $identical (%d), same: $same, better: $better, worse: $worse<br>\n";
  493. // sort
  494. global $sort;
  495. $sort = $_GET['sort'];
  496. if ($sort == '') {
  497. $sort = "order";
  498. }
  499. function cmp($a, $b) {
  500. global $sort;
  501. if ($sort == "worse") {
  502. $a_idx = $a["bleu1"]-$a["bleu0"];
  503. $b_idx = $b["bleu1"]-$b["bleu0"];
  504. }
  505. else if ($sort == "better") {
  506. $a_idx = -$a["bleu1"]+$a["bleu0"];
  507. $b_idx = -$b["bleu1"]+$b["bleu0"];
  508. }
  509. if ($a_idx == $b_idx) {
  510. return 0;
  511. }
  512. return ($a_idx < $b_idx) ? -1 : 1;
  513. }
  514. if ($sort != 'order') {
  515. usort($annotation, 'cmp');
  516. }
  517. $count = $_GET['count'];
  518. if ($count == 0) { $count = 5; }
  519. // display
  520. for($i=0;$i<$count && $i<count($annotation);$i++) {
  521. $line = $annotation[$i];
  522. $word_with_score1 = split(" ",$line["system1"]);
  523. $word_with_score0 = split(" ",$line["system0"]);
  524. $word1 = split(" ",preg_replace("/\|\d/","",$line["system1"]));
  525. $word0 = split(" ",preg_replace("/\|\d/","",$line["system0"]));
  526. $matched_with_score = string_edit_distance($word_with_score0,$word_with_score1);
  527. $matched = string_edit_distance($word0,$word1);
  528. print "<font size=-2>[".$id2."-".$line["id"].":".$line["bleu1"]."]</font> ";
  529. $matched1 = preg_replace('/D/',"",$matched);
  530. $matched_with_score1 = preg_replace('/D/',"",$matched_with_score);
  531. bleu_line_diff( $word_with_score1, $matched1, $matched_with_score1 );
  532. print "<font size=-2>[".$id."-".$line["id"].":".$line["bleu0"]."]</font> ";
  533. $matched0 = preg_replace('/I/',"",$matched);
  534. $matched_with_score0 = preg_replace('/I/',"",$matched_with_score);
  535. bleu_line_diff( $word_with_score0, $matched0, $matched_with_score0 );
  536. print "<font size=-2>[ref]</font> ".$line["reference"]."<hr>";
  537. }
  538. }
  539. function bleu_line_diff( $word,$matched,$matched_with_score ) {
  540. $color = array("#FFC0C0","#FFC0FF","#C0C0FF","#C0FFFF","#C0FFC0");
  541. $lcolor = array("#FFF0F0","#FFF0FF","#F0F0FF","#F0FFFF","#F0FFF0");
  542. for($j=0;$j<count($word);$j++) {
  543. list($surface,$correct) = split("\|", $word[$j]);
  544. if (substr($matched_with_score,$j,1) == "M") {
  545. $style = "background-color: $lcolor[$correct];";
  546. }
  547. else {
  548. $style = "background-color: $color[$correct];";
  549. }
  550. if (substr($matched,$j,1) == "M") {
  551. $style .= "color: #808080;";
  552. }
  553. print "<span style=\"$style\">$surface</span> ";
  554. }
  555. print "<br>";
  556. }
  557. function ngram_diff($type) {
  558. global $set,$id,$id2,$dir;
  559. ini_set('memory_limit',1e9); // 1G for big files
  560. // load data
  561. $order = $_GET['order'];
  562. for($idx=0;$idx<2;$idx++) {
  563. $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","n-gram-$type.$order"));
  564. for($i=0;$i<count($data);$i++) {
  565. $item = split("\t",$data[$i]);
  566. $ngram_hash[$item[2]]["total$idx"] = $item[0];
  567. $ngram_hash[$item[2]]["correct$idx"] = $item[1];
  568. }
  569. unset($data);
  570. }
  571. // sort option
  572. $sort = $_GET['sort'];
  573. $smooth = $_GET['smooth'];
  574. if ($sort == '') {
  575. $sort = 'ratio_worse';
  576. $smooth = 1;
  577. }
  578. error_reporting(E_ERROR); // otherwise undefined counts trigger notices
  579. // sort index
  580. foreach ($ngram_hash as $n => $value) {
  581. $item = $value;
  582. // $item["correct0"] += 0;
  583. // $item["correct1"] += 0;
  584. // $item["total0"] += 0;
  585. // $item["total1"] += 0;
  586. $item["ngram"] = $n;
  587. if ($sort == "abs_worse") {
  588. $item["index"] = (2*$item["correct1"] - $item["total1"])
  589. - (2*$item["correct0"] - $item["total0"]);
  590. }
  591. else if ($sort == "abs_better") {
  592. $item["index"] = - (2*$item["correct1"] - $item["total1"])
  593. + (2*$item["correct0"] - $item["total0"]);
  594. }
  595. else if ($sort == "ratio_worse") {
  596. $item["index"] =
  597. ($item["correct1"] + $smooth) / ($item["total1"] + $smooth)
  598. - ($item["correct0"] + $smooth) / ($item["total0"] + $smooth);
  599. }
  600. else if ($sort == "ratio_better") {
  601. $item["index"] =
  602. - ($item["correct1"] + $smooth) / ($item["total1"] + $smooth)
  603. + ($item["correct0"] + $smooth) / ($item["total0"] + $smooth);
  604. }
  605. $ngram[] = $item;
  606. unset($ngram_hash[$n]);
  607. }
  608. unset($ngram_hash);
  609. // sort
  610. function cmp($a, $b) {
  611. if ($a["index"] == $b["index"]) {
  612. return 0;
  613. }
  614. return ($a["index"] < $b["index"]) ? -1 : 1;
  615. }
  616. usort($ngram, 'cmp');
  617. // display
  618. $count = $_GET['count'];
  619. if ($count == 0) { $count = 5; }
  620. print "<B>$order-gram $type</B><br><font size=-1>sorted by<br>";
  621. if ($sort == "ratio_worse") {
  622. print "ratio worse ";
  623. print "smooth-$smooth ";
  624. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',$smooth+1)\">+</A> ";
  625. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',$smooth-1)\">-</A>,";
  626. }
  627. else {
  628. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',1)\">ratio worse</A>, ";
  629. }
  630. if ($sort == "abs_worse") {
  631. print "absolute worse, ";
  632. }
  633. else {
  634. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'abs_worse',0)\">absolute worse</A>, ";
  635. }
  636. print "<br>";
  637. if ($sort == "ratio_better") {
  638. print "ratio better ";
  639. print "smooth-$smooth ";
  640. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',$smooth+1)\">+</A> ";
  641. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',$smooth-1)\">-</A>,";
  642. }
  643. else {
  644. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',1)\">ratio better</A>, ";
  645. }
  646. if ($sort == "abs_better") {
  647. print "absolute better, ";
  648. }
  649. else {
  650. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'abs_better',0)\">absolute better</A>, ";
  651. }
  652. print "<br>showing $count ";
  653. if ($count < 9999) {
  654. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count+5,'$sort',$smooth)\">more</A> ";
  655. if ($count > 5) {
  656. print "<A HREF=\"javascript:ngram_diff('$type',$order,$count-5,'$sort',$smooth)\">less</A> ";
  657. }
  658. print "<A HREF=\"javascript:ngram_diff('$type',$order,9999,'$sort',$smooth)\">all</A> ";
  659. }
  660. else {
  661. print "<A HREF=\"javascript:ngram_diff('$type',$order,5,'$sort',$smooth)\">top5</A> ";
  662. }
  663. print "<br>\n";
  664. print "<table width=100%>\n";
  665. print "<tr><td>$order-gram</td>";
  666. if ($type == 'recall') {
  667. print "<td>&Delta;</td><td>ok</td><td>x</td></tr>\n";
  668. }
  669. else {
  670. print "<td align=right>&Delta;</td><td>ok</td><td align=right>&Delta;</td><td>x</td></tr>\n";
  671. }
  672. for($i=0;$i<$count && $i<count($ngram);$i++) {
  673. $line = $ngram[$i];
  674. print "<tr><td>".$line["ngram"]."</td>";
  675. $ok = $line["correct1"];
  676. $ok_diff = $ok - $line["correct0"];
  677. $wrong = $line["total1"] - $line["correct1"];
  678. $wrong_diff = $wrong - ($line["total0"]-$line["correct0"]);
  679. if ($type == 'recall') {
  680. printf("<td>%+d</td><td>%d</td><td>%d</td></tr>", $ok_diff,$ok,$wrong);
  681. }
  682. else {
  683. printf("<td align=right>%+d</td><td>(%d)</td><td align=right>%+d</td><td>(%d)</td></tr>", $ok_diff,$ok,$wrong_diff,$wrong);
  684. }
  685. }
  686. print "</table>\n";
  687. }
  688. function string_edit_distance($a,$b) {
  689. $cost = array( array( 0 ) );
  690. $back = array( array( "" ) );
  691. // init boundaries
  692. for($i=0;$i<count($a);$i++) {
  693. $cost[$i+1][0] = $i+1;
  694. }
  695. for($j=0;$j<count($b);$j++) {
  696. $cost[0][$j+1] = $j+1;
  697. }
  698. // exhaustive sed
  699. for($i=1;$i<=count($a);$i++) {
  700. for($j=1;$j<=count($b);$j++) {
  701. $match_cost = ($a[$i-1] == $b[$j-1]) ? 0 : 1;
  702. $c = $match_cost + $cost[$i-1][$j-1];
  703. $p = $match_cost ? "S" : "M";
  704. if ($cost[$i-1][$j]+1 < $c) {
  705. $c = $cost[$i-1][$j]+1;
  706. $p = "D";
  707. }
  708. if ($cost[$i][$j-1]+1 < $c) {
  709. $c = $cost[$i][$j-1]+1;
  710. $p = "I";
  711. }
  712. $cost[$i][$j] = $c;
  713. $back[$i][$j] = $p;
  714. }
  715. }
  716. // retrieve path
  717. $i=count($a);
  718. $j=count($b);
  719. $path = "";
  720. while($i>0 || $j>0) {
  721. if ($back[$i][$j] == "M" || $back[$i][$j] == "S") {
  722. $path = $back[$i][$j] . $path;
  723. $i--; $j--;
  724. }
  725. else if($i==0 || $back[$i][$j] == "I") {
  726. $path = "I".$path;
  727. $j--;
  728. }
  729. else {
  730. $path = "D".$path;
  731. $i--;
  732. }
  733. }
  734. return $path;
  735. }