PageRenderTime 304ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/custom_processing/infinit.e.hadoop.examples/src/examples/SourceSumXML.java

https://github.com/IKANOW/Infinit.e
Java | 96 lines | 49 code | 13 blank | 34 comment | 3 complexity | f193bf248ca5848f604f8fd9df397f45 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /*******************************************************************************
  2. * Copyright 2012, The Infinit.e Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. ******************************************************************************/
  16. package examples;
  17. import java.io.IOException;
  18. import org.apache.hadoop.io.*;
  19. import org.apache.hadoop.mapreduce.Mapper;
  20. import org.apache.hadoop.mapreduce.Reducer;
  21. import org.apache.hadoop.util.ToolRunner;
  22. import org.bson.BSONObject;
  23. import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
  24. import com.mongodb.BasicDBObject;
  25. import com.mongodb.hadoop.util.MongoTool;
  26. public class SourceSumXML extends MongoTool
  27. {
  28. public static class TokenizerMapper extends Mapper<Object, BSONObject, Text, IntWritable> {
  29. private final static IntWritable one = new IntWritable( 1 );
  30. private final Text word = new Text();
  31. public void map( Object key, BSONObject value, Context context ) throws IOException, InterruptedException
  32. {
  33. //Argument access examples:
  34. //1. Just a string argument
  35. /*String args = context.getConfiguration().get("arguments");
  36. word.set(args);
  37. context.write(word,one);*/
  38. //2. Argument is some json we deserialize (i.e. argument is {"word":"something"} )
  39. //String args = context.getConfiguration().get("arguments");
  40. //argumentjson aj = new Gson().fromJson(args, argumentjson.class);
  41. //word.set(aj.word);
  42. //context.write(word,one);
  43. // Document access methods:
  44. // 1. Pure BSON version
  45. //Object source = value.get("source");
  46. // 2. BSON with safe field names
  47. //Object source = value.get(DocumentPojo.source_);
  48. // 3. Data model abstraction
  49. //DocumentPojo doc = DocumentPojo.fromDb( (BasicDBObject) value, DocumentPojo.class );
  50. //String source = doc.getSource();
  51. // 4. Data model abstraction with faster deserialization
  52. value.removeField("associations");
  53. value.removeField("entities");
  54. value.removeField("metadata");
  55. DocumentPojo doc = DocumentPojo.fromDb( (BasicDBObject) value, DocumentPojo.class );
  56. String source = doc.getSource();
  57. if ( source != null )
  58. {
  59. word.set(source.toString());
  60. context.write( word, one);
  61. }
  62. }
  63. }
  64. public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>
  65. {
  66. private final IntWritable result = new IntWritable();
  67. public void reduce( Text key, Iterable<IntWritable> values, Context context )
  68. throws IOException, InterruptedException
  69. {
  70. int sum = 0;
  71. for ( final IntWritable val : values )
  72. {
  73. sum += val.get();
  74. }
  75. result.set( sum );
  76. context.write( key, result );
  77. }
  78. }
  79. public static void main( String[] args ) throws Exception{
  80. final int exitCode = ToolRunner.run( new SourceSumXML(), args );
  81. System.exit( exitCode );
  82. }
  83. }