PageRenderTime 58ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/src/edu/psu/chemxseer/structure/setcover/IO/Input_MemBucket.java

https://github.com/Santa827/Chemxseer_subSearch
Java | 106 lines | 75 code | 14 blank | 17 comment | 8 complexity | a04ad59c672279a00b763d9504cdfc97 MD5 | raw file
  1. package edu.psu.chemxseer.structure.setcover.IO;
  2. import java.util.ArrayList;
  3. import java.util.Arrays;
  4. import java.util.List;
  5. import edu.psu.chemxseer.structure.setcover.featureGenerator.IFeatureSetConverter;
  6. import edu.psu.chemxseer.structure.setcover.sets.CoverSet_FeatureWrapper;
  7. import edu.psu.chemxseer.structure.setcover.sets.CoverSet_FeatureWrapper2;
  8. import edu.psu.chemxseer.structure.subsearch.Impl.indexfeature.PostingFeaturesMultiClass;
  9. /**
  10. * In-memory implementation of the buckets:
  11. * @author dayuyuan
  12. *
  13. */
  14. public class Input_MemBucket implements IInputBucket{
  15. private Input_Mem[] buckets; // the files saving the input of the sets
  16. private double logP; // logP is for segmentation
  17. public static Input_MemBucket newInstance (PostingFeaturesMultiClass postingFeatures,
  18. IFeatureSetConverter converter, String filePrefix, double p){
  19. //1. Pre-process: sort the features according to its size
  20. CoverSet_FeatureWrapper2[] features = postingFeatures.toWrapper(converter);
  21. Arrays.sort(features);
  22. //2. Doing the Segmentation
  23. //2.1 find the min * max of K
  24. double logP = Math.log(p);
  25. int smallestK = (int) (Math.log(features[0].getGain())/logP);
  26. int largestK = (int)(Math.log(features[0].getGain())/logP);
  27. List<CoverSet_FeatureWrapper>[] inputs = new ArrayList[largestK+1];
  28. for(int i = 0; i< inputs.length; i++){
  29. inputs[i] = new ArrayList<CoverSet_FeatureWrapper>();
  30. }
  31. int power = (int)(Math.exp(logP*(smallestK+1)));
  32. for(int fID = 0, i = smallestK; i< inputs.length; ){
  33. if(features[fID].getGain() > power){
  34. power = (int) (power * p);
  35. i++;
  36. continue; // continue the iteration
  37. }
  38. else {
  39. inputs[i].add(new CoverSet_FeatureWrapper(features[fID]));
  40. fID++;
  41. }
  42. }
  43. return new Input_MemBucket(logP, inputs);
  44. }
  45. public Input_MemBucket(double logP, List<CoverSet_FeatureWrapper>[] inputs){
  46. this.logP = logP;
  47. this.buckets = new Input_Mem[inputs.length];
  48. for(int i = 0; i< buckets.length; i++){
  49. if(inputs[i]!=null)
  50. buckets[i] = Input_Mem.newInstance(inputs[i]);
  51. else continue;
  52. }
  53. }
  54. @Override
  55. public int getBucketCount() {
  56. return this.buckets.length;
  57. }
  58. @Override
  59. public boolean append(CoverSet_FeatureWrapper feature, int gain) {
  60. // First find what is the "level" the "gain" function should be in
  61. int bucketID = this.getBucket(gain);
  62. if(buckets[bucketID]!=null){
  63. buckets[bucketID].appendFeature(feature);
  64. return true;
  65. }
  66. else{
  67. buckets[bucketID] = Input_Mem.newEmptyInstance();
  68. buckets[bucketID].appendFeature(feature);
  69. return true;
  70. }
  71. }
  72. /**
  73. * Given the gain function, calculate the bucket the set
  74. * should be assigned to
  75. * @param gain
  76. * @return
  77. */
  78. private int getBucket(int gain){
  79. int result = (int)( Math.log(gain)/ logP);
  80. return result;
  81. }
  82. @Override
  83. public int getLowerBound(int bID) {
  84. // TODO Auto-generated method stub
  85. return 0;
  86. }
  87. @Override
  88. public IInputSequential getBucketInput(int bID) {
  89. // TODO Auto-generated method stub
  90. return null;
  91. }
  92. }