/src/main/java/stream/quantiles/SumQuantiles.java
https://bitbucket.org/cbockermann/stream-analysis · Java · 163 lines · 116 code · 30 blank · 17 comment · 11 complexity · c8fbcfddfc490289cfe2a0b7284c9069 MD5 · raw file
- package stream.quantiles;
- import java.io.Serializable;
- import java.util.List;
- import java.util.Map;
- import java.util.concurrent.ConcurrentHashMap;
- import java.util.concurrent.CopyOnWriteArrayList;
- import stream.quantiles.impl.AbstractQuantileLearner;
- public class SumQuantiles extends AbstractQuantileLearner {
- private int slidingWindowSize = 5000;
- private int maxBucketCount = 10;
- private int elementPerBucket = slidingWindowSize / maxBucketCount;
- private int biggestSeenElement = 0;
- Bucket newestBucket = null;
- final List<Bucket> buckets = new CopyOnWriteArrayList<Bucket>();
- public SumQuantiles(int slidingWindowSize, int bucketCount) {
- this.slidingWindowSize = slidingWindowSize;
- this.maxBucketCount = bucketCount;
- elementPerBucket = slidingWindowSize / maxBucketCount;
- addNewBucket();
- }
- /**
- * @see stream.service.Service#reset()
- */
- @Override
- public void reset() throws Exception {
- buckets.clear();
- }
- public void init() {
- }
- private void addNewBucket() {
- Bucket newBucket = new Bucket();
- buckets.add(newBucket);
- newestBucket = newBucket;
- deleteExcessiveBuckets();
- // System.out.println("new");
- }
- /**
- * delete oldest {@link Bucket} while we have too many of them.
- */
- private void deleteExcessiveBuckets() {
- while (buckets.size() > maxBucketCount) {
- buckets.remove(0);
- }
- }
- public Double getQuantile(Double phi) {
- int overallElementCount = 0;
- for (Bucket bucket : buckets) {
- overallElementCount += bucket.getElementCount();
- }
- int wantedRank = (int) ((double) overallElementCount * phi);
- int sum = 0;
- // System.out.println("--------------------------------------" );
- // System.out.println("ElementCount " + overallElementCount);
- // System.out.println("wantedRank " + wantedRank);
- for (int i = 0; i < biggestSeenElement; i++) {
- long predict = getAllBucketPrediction(i);
- sum += predict;
- // System.out.println(sum);
- if (sum >= wantedRank) {
- return (double) i;
- }
- }
- return 0.0;
- }
- private int getAllBucketPrediction(int item) {
- int prediction = 0;
- for (Bucket bucket : buckets) {
- prediction += bucket.predict(item);
- }
- return prediction;
- }
- public void printBuckets() {
- for (Bucket bucket : buckets) {
- System.out.println(bucket);
- }
- }
- /**
- * @see edu.udo.cs.pg542.util.DataStreamProcessor#process(java.lang.Object)
- */
- @Override
- public void learn(Double item) {
- biggestSeenElement = Math.max(biggestSeenElement, item.intValue());
- newestBucket.learn(item);
- if (newestBucket.isFull()) {
- addNewBucket();
- }
- }
- private class Bucket implements Serializable {
- /**
- *
- */
- private static final long serialVersionUID = -2211156505869843563L;
- int elementCount = 0;
- Map<String, Integer> counterMap;
- public Bucket() {
- counterMap = new ConcurrentHashMap<String, Integer>();
- }
- public long predict(int item) {
- String asString = ((Integer) item).toString();
- if (counterMap.containsKey(asString)) {
- return counterMap.get(asString);
- }
- return 0;
- }
- public void learn(Double item) {
- int value = item.intValue();
- String asString = ((Integer) value).toString();
- if (counterMap.containsKey(asString)) {
- int counter = counterMap.get(asString);
- counter++;
- counterMap.put(asString, counter);
- } else {
- counterMap.put(asString, 1);
- }
- elementCount++;
- }
- public int getElementCount() {
- return elementCount;
- }
- public boolean isFull() {
- return elementCount >= elementPerBucket;
- }
- @Override
- public String toString() {
- System.out.println("--------------------------------------");
- String out = "Bucket: \n";
- for (String key : counterMap.keySet()) {
- out = out + key + " " + counterMap.get(key) + "\n";
- }
- return out;
- }
- }
- }