PageRenderTime 60ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/java/src/uk/ac/strath/cis/spd/buglanguage/evaluate/CombineEvaluation.java

https://bitbucket.org/el_loserio/buglocalisation
Java | 226 lines | 192 code | 28 blank | 6 comment | 18 complexity | 39cda826c16161893cfb80d82ea55552 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, GPL-3.0
  1. package uk.ac.strath.cis.spd.buglanguage.evaluate;
  2. /*
  3. * See LICENCE_BSD for licensing information.
  4. *
  5. * Copyright Steven Davies 2012
  6. */
  7. import java.io.BufferedReader;
  8. import java.io.File;
  9. import java.io.FileNotFoundException;
  10. import java.io.FileReader;
  11. import java.io.InputStreamReader;
  12. import java.io.PrintWriter;
  13. import java.text.DateFormat;
  14. import java.text.SimpleDateFormat;
  15. import java.util.ArrayList;
  16. import java.util.Arrays;
  17. import java.util.Comparator;
  18. import java.util.Date;
  19. import java.util.Iterator;
  20. import java.util.List;
  21. import java.util.Properties;
  22. import java.util.Random;
  23. import org.apache.commons.io.FileUtils;
  24. import org.apache.commons.lang.StringUtils;
  25. import weka.classifiers.functions.LinearRegression;
  26. import weka.core.Attribute;
  27. import weka.core.FastVector;
  28. import weka.core.Instance;
  29. import weka.core.Instances;
  30. public class CombineEvaluation {
  31. private static final DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  32. private static final ProcessBuilder gitHash = new ProcessBuilder("git", "log", "-1", "--pretty=format:%H");
  33. private static final ProcessBuilder gitStats = new ProcessBuilder("git", "diff", "--numstat");
  34. private static final List<String> names = Arrays.asList("Tfidf", "bug_count", "coverage", "similarity", "stack");
  35. private List<File> inputFiles;
  36. private File resultsDir, relevant;
  37. private long seed;
  38. private Random random;
  39. public CombineEvaluation(String config) throws Exception {
  40. Properties props = new Properties();
  41. props.load(new FileReader(config));
  42. seed = Long.parseLong(props.getProperty("seed"));
  43. random = new Random(seed);
  44. relevant = new File(props.getProperty("results"), "relevant");
  45. resultsDir = new File(props.getProperty("results"), "combined");
  46. resultsDir.mkdir();
  47. FileUtils.cleanDirectory(resultsDir);
  48. inputFiles = new ArrayList<File>();
  49. for(String name: names){
  50. inputFiles.add(new File(props.getProperty("results"), name));
  51. }
  52. }
  53. private String[] nextLine(Iterator<String> iterator) {
  54. if(iterator == null){
  55. return null;
  56. }
  57. String line;
  58. do{
  59. line = iterator.next();
  60. } while(line.startsWith("#"));
  61. return line.trim().split("\\s+");
  62. }
  63. public void evaluate() throws Exception {
  64. FastVector attributes = new FastVector();
  65. for(String name: names){
  66. attributes.addElement(new Attribute(name));
  67. }
  68. attributes.addElement(new Attribute("relevant"));
  69. Instances instances = new Instances("bug", attributes, 0);
  70. instances.setClassIndex(instances.numAttributes() - 1);
  71. LinearRegression classifier = new LinearRegression();
  72. boolean trained = false;
  73. String[] bugs = relevant.list();
  74. Arrays.sort(bugs, new Comparator<String>() {
  75. @Override
  76. public int compare(String o1, String o2) {
  77. return Integer.valueOf(o1).compareTo(Integer.valueOf(o2));
  78. }
  79. });
  80. for(String bugId: bugs){
  81. Iterator<String> relevantMethods = FileUtils.lineIterator(new File(relevant, bugId));
  82. List<Iterator<String>> allValues = new ArrayList<Iterator<String>>();
  83. for(File inputFile: inputFiles){
  84. try{
  85. allValues.add(FileUtils.lineIterator(new File(inputFile, bugId)));
  86. }
  87. catch(FileNotFoundException e){
  88. System.out.println(e.getMessage());
  89. }
  90. }
  91. PrintWriter results = new PrintWriter(FileUtils.openOutputStream(new File(resultsDir, bugId)));
  92. try{
  93. writeMetaData(results);
  94. while(relevantMethods.hasNext()){
  95. String[] parts = nextLine(relevantMethods);
  96. String methodId = parts[0];
  97. double relevant = Double.parseDouble(parts[1]);
  98. Instance instance = new Instance(instances.numAttributes());
  99. instance.setDataset(instances);
  100. for(int i = 0; i < allValues.size(); i++){
  101. String[] parts2 = nextLine(allValues.get(i));
  102. if(parts2 == null){
  103. instance.setValue(i, 0);
  104. }
  105. else{
  106. assert parts2[0] == methodId;
  107. try{
  108. instance.setValue(i, Double.parseDouble(parts2[1]));
  109. }
  110. catch(NumberFormatException e){
  111. System.out.println(e.getMessage());
  112. }
  113. }
  114. }
  115. instance.setClassValue(relevant);
  116. double result = 0;
  117. if(trained){
  118. result = classifier.distributionForInstance(instance)[0];
  119. }
  120. print(results, methodId, result);
  121. if(relevant > 0 || random.nextDouble() <= 0.01){
  122. instances.add(instance);
  123. }
  124. }
  125. }
  126. finally{
  127. results.close();
  128. }
  129. if(instances.numInstances() > 0){
  130. trained = true;
  131. classifier.buildClassifier(instances);
  132. System.out.println(classifier.toString().replaceAll("\\s+", " "));
  133. }
  134. }
  135. }
  136. protected void writeMetaData(PrintWriter writer) {
  137. print(writer, "#", "Attributes", 7);
  138. print(writer, "#", "Source", getClass().getCanonicalName());
  139. print(writer, "#", "Timestamp", dateFormat.format(new Date()));
  140. print(writer, "#", "Version", getGitVersion());
  141. print(writer, "#", "Changes", getGitChanges());
  142. print(writer, "#", "Database", getDbVersion());
  143. print(writer, "#", "Seed", seed);
  144. }
  145. private String getGitVersion() {
  146. try{
  147. Process process = gitHash.start();
  148. process.waitFor();
  149. BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
  150. return reader.readLine();
  151. }
  152. catch(Exception e){
  153. e.printStackTrace();
  154. return "Unavailable";
  155. }
  156. }
  157. private String getGitChanges() {
  158. try{
  159. Process process = gitStats.start();
  160. process.waitFor();
  161. BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
  162. String line = reader.readLine();
  163. int[] changes = new int[3];
  164. while(line != null){
  165. String[] parts = line.split("\\s+", 3);
  166. if(!parts[2].startsWith("data/")){
  167. try{
  168. changes[0] += Integer.parseInt(parts[0]);
  169. changes[1] += Integer.parseInt(parts[1]);
  170. }
  171. catch(NumberFormatException e){
  172. // Do nothing
  173. }
  174. changes[2] += 1;
  175. }
  176. line = reader.readLine();
  177. }
  178. if(changes[2] == 0){
  179. return "No changes";
  180. }
  181. return String.format("%s files changed, %s insertions(+), %s deletions(-)", changes[2], changes[0], changes[1]);
  182. }
  183. catch(Exception e){
  184. e.printStackTrace();
  185. return "Unavailable";
  186. }
  187. }
  188. private String getDbVersion() {
  189. return "NA";
  190. }
  191. private void print(PrintWriter writer, Object... objects) {
  192. writer.println(StringUtils.join(objects, "\t"));
  193. }
  194. public static void main(String[] args) throws Exception {
  195. CombineEvaluation evaluation = new CombineEvaluation(args[0]);
  196. evaluation.evaluate();
  197. }
  198. }