PageRenderTime 38ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/strigi-0.7.7/libstreamanalyzer/include/strigi/indexwriter.h

#
C Header | 254 lines | 45 code | 7 blank | 202 comment | 0 complexity | d8b7d7a4115bceae2db8cb42821db3ec MD5 | raw file
Possible License(s): LGPL-2.0
  1. /* This file is part of Strigi Desktop Search
  2. *
  3. * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public License
  16. * along with this library; see the file COPYING.LIB. If not, write to
  17. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  18. * Boston, MA 02110-1301, USA.
  19. */
  20. #ifndef STRIGI_INDEXWRITER_H
  21. #define STRIGI_INDEXWRITER_H
  22. #include <string>
  23. #include <vector>
  24. #include <strigi/strigiconfig.h>
  25. #include "streamanalyzer.h"
  26. namespace Strigi {
  27. template <class T> class StreamBase;
  28. class FieldRegister;
  29. class AnalysisResult;
  30. class RegisteredField;
  31. /**
  32. * Abstract class that provides write access to a Strigi index.
  33. *
  34. * Instances of the class should be obtained by calling the function
  35. * IndexManager::indexWriter() and should not be used from threads other
  36. * than the thread that called IndexManager::indexWriter().
  37. *
  38. * The lifecycle of an IndexWriter should be:
  39. * <pre>
  40. * - create indexwriter
  41. * for all streams {
  42. * - create an indexable
  43. * - add the indexwriter to it
  44. * - add a stream to the indexable (optional)
  45. * - add fields to indexable (optional)
  46. * - delete the indexable
  47. * }
  48. * - delete the indexwriter
  49. * </pre>
  50. *
  51. * Where the functions have default implementations, they
  52. * do not do anything.
  53. */
  54. class STREAMANALYZER_EXPORT IndexWriter {
  55. friend class AnalysisResult;
  56. public:
  57. /**
  58. * @brief Notifies the IndexWriter that a new stream is being analyzed.
  59. *
  60. * @param result the AnalysisResult for the stream that is being
  61. * analyzed.
  62. */
  63. virtual void startAnalysis(const AnalysisResult*) = 0;
  64. /**
  65. * @brief Add a fragment of text to the index.
  66. *
  67. * See AnalysisResult::addText() for more information.
  68. *
  69. * @param result the AnalysisResult for the object that is
  70. * being analyzed
  71. * @param text a pointer to a fragment of utf8 encoded text
  72. * @param length the length of the fragment
  73. */
  74. virtual void addText(const AnalysisResult* result, const char* text, int32_t length)=0;
  75. /**
  76. * @brief Add a field to the index.
  77. *
  78. * See AnalysisResult::addValue() for more information.
  79. *
  80. * @param result the AnalysisResult for the object that is
  81. * being analyzed
  82. * @param field description of the field
  83. * @param value value of the field
  84. */
  85. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  86. const std::string& value) = 0;
  87. /**
  88. * @brief Add a field to the index.
  89. *
  90. * See AnalysisResult::addValue() for more information.
  91. *
  92. * @param result the AnalysisResult for the object that is
  93. * being analyzed
  94. * @param field description of the field
  95. * @param data value of the field
  96. * @param size length of the data
  97. */
  98. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  99. const unsigned char* data, uint32_t size) = 0;
  100. /**
  101. * @brief Add a field to the index.
  102. *
  103. * See AnalysisResult::addValue() for more information.
  104. *
  105. * @param result the AnalysisResult for the object that is
  106. * being analyzed
  107. * @param field description of the field
  108. * @param value value of the field
  109. */
  110. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  111. int32_t value) = 0;
  112. /**
  113. * @brief Add a field to the index.
  114. *
  115. * See AnalysisResult::addValue() for more information.
  116. *
  117. * @param result the AnalysisResult for the object that is
  118. * being analyzed
  119. * @param field description of the field
  120. * @param value value of the field
  121. */
  122. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  123. uint32_t value) = 0;
  124. /**
  125. * @brief Add a field to the index.
  126. *
  127. * See AnalysisResult::addValue() for more information.
  128. *
  129. * @param result the AnalysisResult for the object that is
  130. * being analyzed
  131. * @param field description of the field
  132. * @param value value of the field
  133. */
  134. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  135. double value) = 0;
  136. /**
  137. * @brief Add a field to the index.
  138. *
  139. * See AnalysisResult::addValue() for more information.
  140. *
  141. * @param result the AnalysisResult for the object that is
  142. * being analyzed
  143. * @param field description of the field
  144. * @param name
  145. * @param value value of the field
  146. */
  147. virtual void addValue(const AnalysisResult* result, const RegisteredField* field,
  148. const std::string& name, const std::string& value) = 0;
  149. /**
  150. * @brief Notifies the IndexWriter that the analysis of this
  151. * stream is complete.
  152. *
  153. * @param result the AnalysisResult for the stream that has finished
  154. * being analyzed.
  155. */
  156. virtual void finishAnalysis(const AnalysisResult* result) = 0;
  157. /**
  158. * @brief Add a complete RDF triplet.
  159. *
  160. * @param subject
  161. * @param predicate
  162. * @param object
  163. **/
  164. virtual void addTriplet(const std::string& subject,
  165. const std::string& predicate, const std::string& object) = 0;
  166. public:
  167. virtual ~IndexWriter() {}
  168. /**
  169. * @brief Flush the accumulated changes to disk.
  170. **/
  171. virtual void commit() { return; }
  172. /**
  173. * @brief Delete the entries with the given paths from the index.
  174. *
  175. * @param entries the paths of the files that should be deleted
  176. **/
  177. virtual void deleteEntries(const std::vector<std::string>& entries) = 0;
  178. /**
  179. * @brief Delete all indexed documents from the index.
  180. **/
  181. virtual void deleteAllEntries() = 0;
  182. /**
  183. * @brief Return the number of objects that are currently in the cache.
  184. **/
  185. virtual int itemsInCache() { return 0; }
  186. /**
  187. * @brief Optimize the index.
  188. *
  189. * This can be computationally intensive and may cause the index to
  190. * temporarily use the double amount of diskspace.
  191. **/
  192. virtual void optimize() {}
  193. /**
  194. * @brief Initialise the writer data of the fields.
  195. *
  196. * This performs initialization on the writer data of the fields in
  197. * @p fieldRegister. When called, the writer data is set to a 0-pointer
  198. * for every field.
  199. *
  200. * For example, this function may create an object to store the value for
  201. * the field. The following example is based on code from xmlindexer:
  202. * @code
  203. * map<string, RegisteredField*>::const_iterator i;
  204. * map<string, RegisteredField*>::const_iterator end = fieldRegister.fields().end();
  205. * for (i = fieldRegister.fields().begin(); i != end; ++i) {
  206. * Tag* tag = new Tag();
  207. * const string name(i->first);
  208. * tag->open = " <value name='" + name + "'>";
  209. * tag->close = "</value>\n";
  210. * i->second->setWriterData(tag);
  211. * }
  212. * @endcode
  213. *
  214. * This function must be called on a fieldRegister before any of its
  215. * fields are passed to any addValue() function.
  216. *
  217. * @param fieldRegister the FieldRegister to initialize
  218. */
  219. virtual void initWriterData(const Strigi::FieldRegister& /*fieldRegister*/){
  220. }
  221. /**
  222. * @brief Clean up the writer data of the fields.
  223. *
  224. * This cleans up the writer data set with initWriterData() and/or addValue().
  225. * Typically, this will mean deleting the memory used by the writer data:
  226. * @code
  227. * map<string, RegisteredField*>::const_iterator i;
  228. * map<string, RegisteredField*>::const_iterator end = f.fields().end();
  229. * for (i = f.fields().begin(); i != end; ++i) {
  230. * delete static_cast<Tag*>(i->second->writerData());
  231. * }
  232. * @endcode
  233. *
  234. * No further calls may be made to addValue() with any Strigi::RegisteredField
  235. * that has already been passed to this function in a Strigi::FieldRegister.
  236. *
  237. * This function may only be called once for each corresponding call of
  238. * initWriterData() with the same Strigi::FieldRegister, and this must take
  239. * place after the call to initWriterData().
  240. *
  241. * @param fieldRegister contains the fields to be cleaned up after
  242. */
  243. virtual void releaseWriterData(const Strigi::FieldRegister &/*fieldRegister*/) {
  244. }
  245. };
  246. } // end namespace Strigi
  247. #endif