/edu/uncc/parsets/data/old/DataDimension.java

https://code.google.com/p/parsets/ · Java · 186 lines · 120 code · 32 blank · 34 comment · 20 complexity · d5dfdcccb56062733bc5a041be6d70b1 MD5 · raw file

  1. package edu.uncc.parsets.data.old;
  2. import java.util.ArrayList;
  3. import java.util.List;
  4. import java.util.Map;
  5. import java.util.TreeMap;
  6. import edu.uncc.parsets.data.DataType;
  7. import edu.uncc.parsets.util.PSLogging;
  8. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
  9. * Copyright (c) 2009, Robert Kosara, Caroline Ziemkiewicz,
  10. * and others (see Authors.txt for full list)
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions are met:
  15. *
  16. * * Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. * * Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. * * Neither the name of UNC Charlotte nor the names of its contributors
  22. * may be used to endorse or promote products derived from this software
  23. * without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY ITS AUTHORS ''AS IS'' AND ANY
  26. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  27. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  28. * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
  29. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  30. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  32. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  34. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35. \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  36. public class DataDimension {
  37. // * The key (internal name) for this dimension
  38. private String dimensionKey;
  39. private DataType type;
  40. private String name;
  41. private Map<String, String> categoryNames = new TreeMap<String, String>();
  42. // * The map of category keys to integers, for faster lookup when parsing
  43. private Map<String, Integer> categoryMap = new TreeMap<String, Integer>();
  44. private ArrayList<String> categoryKeys = new ArrayList<String>();
  45. private Map<String, Integer> occurrenceCounts = new TreeMap<String, Integer>();
  46. private List<String> values = new ArrayList<String>(100);
  47. // private boolean hasMetaData = false;
  48. private String dbHandle;
  49. public DataDimension(String key, DataType dataType) {
  50. dimensionKey = key;
  51. type = dataType;
  52. }
  53. public void addCategory(String key, String name) {
  54. categoryNames.put(key, name);
  55. categoryKeys.add(key);
  56. categoryMap.put(key, categoryKeys.size());
  57. }
  58. public void addValue(String value) {
  59. if (values.size() < 100)
  60. values.add(value);
  61. Integer num = occurrenceCounts.get(value);
  62. if (num == null)
  63. num = Integer.valueOf(1);
  64. else
  65. num = Integer.valueOf(num+1);
  66. occurrenceCounts.put(value, num);
  67. switch(type) {
  68. case numerical:
  69. try {
  70. Float.parseFloat(value);
  71. } catch (Exception e) {
  72. type = DataType.categorical;
  73. }
  74. break;
  75. case categorical:
  76. if (occurrenceCounts.size() > 100)
  77. type = DataType.textual;
  78. break;
  79. }
  80. if (type != DataType.textual)
  81. // if (!hasMetaData)
  82. if (!categoryMap.containsKey(value))
  83. addCategory(value, value);
  84. }
  85. public int getOccurrenceCount(String key) {
  86. Integer count = occurrenceCounts.get(key);
  87. if (count != null)
  88. return count;
  89. else {
  90. PSLogging.logger.warn("No occurrance count found for '"+key+"'");
  91. return 0;
  92. }
  93. }
  94. public List<String> getValues() {
  95. return values;
  96. }
  97. public String getName() {
  98. if (name != null)
  99. return name;
  100. else
  101. return dimensionKey;
  102. }
  103. public String getKey() {
  104. return dimensionKey;
  105. }
  106. public String toString() {
  107. return dimensionKey;
  108. }
  109. public String getCategoryName(int categoryValue) {
  110. if (type != DataType.textual)
  111. return categoryNames.get(categoryKeys.get(categoryValue));
  112. else {
  113. for (Map.Entry<String, Integer> e : categoryMap.entrySet()) {
  114. if (e.getValue() == categoryValue)
  115. return e.getKey();
  116. }
  117. return null;
  118. }
  119. }
  120. public void setCategoryName(int categoryValue, String name) {
  121. categoryNames.put(categoryKeys.get(categoryValue), name);
  122. }
  123. public String getCategoryKey(int categoryValue) {
  124. return categoryKeys.get(categoryValue);
  125. }
  126. public int getNumCategories() {
  127. return categoryKeys.size();
  128. }
  129. public DataType getDataType() {
  130. return type;
  131. }
  132. public void setDataType(DataType type) {
  133. this.type = type;
  134. }
  135. public void setName(String newName) {
  136. name = newName;
  137. // hasMetaData = true;
  138. }
  139. public int getNumForKey(String key) {
  140. // TODO: Debug code that needs to go before release
  141. if (categoryMap.containsKey(key))
  142. return categoryMap.get(key);
  143. else {
  144. System.err.println("Can't find "+key+" in dimension "+getName());
  145. return 0;
  146. }
  147. }
  148. public void setHandle(String dimHandle) {
  149. dbHandle = dimHandle;
  150. }
  151. public String getHandle() {
  152. return dbHandle;
  153. }
  154. }