PageRenderTime 1736ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 1ms

/CatRec.h

https://github.com/gigablast/open-source-search-engine
C Header | 527 lines | 44 code | 55 blank | 428 comment | 0 complexity | a25b7947fffbe67a17968015e8abefa0 MD5 | raw file
Possible License(s): Apache-2.0
  1. // Matt Wells, copyright Jul 201
  2. // . the record retrieved from tagdb
  3. // . used for describing a site
  4. // . can parse out record from our rdb or from a network msg
  5. // . has siteUrl and filenum of the file that holds the Xml that has the
  6. // parsing rules and quotas for docs in that site
  7. // . we have the fields you can use at the bottom of this file
  8. #ifndef _CATREC_H_
  9. #define _CATREC_H_
  10. #include "Conf.h"
  11. #include "Xml.h"
  12. #include "RdbList.h"
  13. #include "Tagdb.h"
  14. #include "Categories.h"
  15. #include "Lang.h"
  16. #include "Tagdb.h"
  17. #include "Catdb.h"
  18. #define MAX_IND_CATIDS 1024
  19. #define MAX_SITE_TYPES 12
  20. // url, catids, indirect catids, numCatids, numIndCatids, filenum
  21. #define CATREC_BUF_SIZE MAX_URL_LEN + MAX_CATIDS*4 + 9
  22. class CatRec {
  23. public:
  24. // these just set m_xml to NULL
  25. void reset() ;
  26. CatRec();
  27. ~CatRec();
  28. // . extract the site url for "url"
  29. // . extract the filenum of the file that holds the xml we want
  30. // . returns false and sets errno on error setting
  31. // . if rec is NULL we use the default rec for this collection
  32. bool set ( Url *url, char *data,int32_t dataSize,
  33. bool gotByIp ); // , char rdbId = RDB_TAGDB );
  34. // we're empty if m_xml is NULL
  35. //bool isEmpty() { return (! m_xml); };
  36. // . used to by Msg9 to make a CatRec to add
  37. // . serializes filenum/site into our m_data/m_dataSize
  38. // . returns false and sets errno on error
  39. /*
  40. bool set ( Url *site , char *coll , int32_t collLen , int32_t filenum ,
  41. char version , char rdbId = RDB_TAGDB , int32_t timeStamp = 0,
  42. char *comment = NULL, char *username = NULL,
  43. int32_t *catids = NULL, unsigned char numCatids = 0,
  44. unsigned char spamBits = 0, char siteQuality = 0,
  45. char adultLevel = 0,
  46. SiteType *siteTypes = NULL,
  47. uint8_t numTypes = 0,
  48. SiteType *langs = NULL,
  49. uint8_t numLangs = 0);
  50. */
  51. bool set ( Url *site , int32_t filenum ,
  52. int32_t *catids = NULL, unsigned char numCatids = 0 );
  53. //Xml *getXml() { return m_xml; };
  54. //bool set ( int32_t filenum ) ;
  55. // . this method just sets the filenum, version, url and url-len from
  56. // data-pointer "data"
  57. // . this method is written as an alternative to the above set methods
  58. // Useful if the caller is interested just in the url and url len
  59. // saves time
  60. bool set (char *data, int32_t dataSize);//, char rdbId );
  61. // set the indirect catids
  62. void setIndirectCatids ( int32_t *indCatids, int32_t numIndCatids );
  63. // . did this url have an entry in tagdb?
  64. // . we need this to know because if it didn't it will have default rec
  65. // . Msg16 will override Url::isSpam() if this record is not default
  66. // . Msg25 will also not bother checking for link bans via Msg18
  67. bool hadRec() { return m_hadRec; };
  68. // . did we get it by ip? (if not, we got it by canonical domain name)
  69. // . if we got it by IP and it was banned, admin has the option to
  70. // tell gigablast to automatically add the domain name as banned
  71. // to tagdb in Msg14.cpp
  72. bool gotByIp() { return m_gotByIp; };
  73. // get the record itself (just templateNum/site/coll)
  74. char *getData ( ) { return m_data; };
  75. int32_t getDataSize ( ) { return m_dataSize; };
  76. // along with coll/collLen identifies a unique xml file
  77. //int32_t getFilenum ( ) { return m_filenum; };
  78. //int32_t getRuleset ( ) { return m_filenum; };
  79. // . these should both be NULL terminated
  80. // . they both reference into the data contained in m_list
  81. // or m_buf if the list doesn't have a site record for us
  82. Url *getSite ( ) { return &m_site; };
  83. //char *getCollection ( ) { return m_coll; };
  84. //int32_t getCollectionLen ( ) { return m_collLen; };
  85. /*
  86. char* printFormattedRec(char* p);
  87. void printFormattedRec(SafeBuf *sb);
  88. char* printXmlRec (char* p);
  89. void printXmlRec ( SafeBuf *sb );
  90. //status of manually set bits.
  91. bool isSpamUnknown() { return m_spamBits == SPAM_UNKNOWN; }
  92. bool isSpam() { return m_spamBits == SPAM_BIT; }
  93. bool isNotSpam() { return m_spamBits == NOT_SPAM; }
  94. char* getSpamStr();
  95. unsigned char getSpamStatus() { return m_spamBits; }
  96. //
  97. bool isRatingUnknown() { return m_adultLevel == NOT_RATED; }
  98. bool isAdultButNotPorn() { return m_adultLevel == RATED_R; }
  99. bool isPorn() { return m_adultLevel == RATED_X; }
  100. bool isKidSafe() { return m_adultLevel == RATED_G; }
  101. char* getAdultStr();
  102. char *getPubDateFmtStr();
  103. int32_t getTimeStamp() { return m_timeStamp; }
  104. char *getComment() { return m_comment; }
  105. char *getUsername() { return m_username; }
  106. char getSiteQuality() { return m_siteQuality; }
  107. int32_t getNumSiteTypes () { return m_numTypes; }
  108. int32_t getNumSiteLangs () { return m_numLangs; }
  109. SiteType *getSiteTypes () { return m_siteTypes; }
  110. SiteType *getSiteLangs () { return m_siteLangs; }
  111. uint32_t getScoreForType(uint8_t type);
  112. // . mod functions
  113. // . pain in the butt cuz we gotta change m_data/m_dataSize buffer too
  114. void addSiteType (uint8_t type, uint32_t score ) ;
  115. void setFilenum (int32_t newFilenum );
  116. // . [n0,n1] constitute an xml node range in "xml"
  117. // . "len" is the length of another node's data in another xml doc
  118. // . gets the scoreWeight from docQuality and a node's dataLen
  119. // . 2nd one gets the maxScore from docQuality
  120. int32_t getScoreWeightFromQuality ( int32_t n0, int32_t n1, int32_t quality );
  121. int32_t getScoreWeightFromQuality2( int32_t quality );
  122. int32_t getMaxScoreFromQuality ( int32_t n0, int32_t n1, int32_t quality );
  123. int32_t getMaxLenFromQuality ( int32_t n0, int32_t n1, int32_t quality );
  124. //bool hasMaxCountFromQualityTag ( int32_t n0, int32_t n1 ) ;
  125. //int32_t getMaxCountFromQuality ( int32_t n0, int32_t n1, int32_t quality ) ;
  126. int32_t getScoreWeightFromLen ( int32_t n0, int32_t n1, int32_t len );
  127. int32_t getScoreWeightFromLen2 ( int32_t len );
  128. int32_t getScoreWeightFromNumWords( int32_t n0, int32_t n1, int32_t len );
  129. int32_t getMaxScoreFromLen ( int32_t n0, int32_t n1, int32_t quality );
  130. int32_t getMaxScoreFromNumWords ( int32_t n0, int32_t n1, int32_t quality );
  131. // 2 new maps for boosting base quality from link statistics
  132. int32_t getQualityBoostFromNumLinks ( int32_t numLinks );
  133. int32_t getQualityBoostFromLinkQualitySum ( int32_t linkBaseQualitySum );
  134. // 2 new maps for maxScore/scoreWeight of outgoing linkText
  135. int32_t getLinkTextScoreWeightFromLinkerQuality ( int32_t quality );
  136. int32_t getLinkTextScoreWeightFromLinkeeQuality ( int32_t quality );
  137. int32_t getLinkTextMaxScoreFromQuality ( int32_t quality );
  138. int32_t getLinkTextScoreWeightFromNumWords( int32_t numWords );
  139. // . another new map for boosting quality from the link-adjusted
  140. // quality of our root page
  141. // . root page is just our site url (i.e. http://about.com/)
  142. // . "rootQuality" is link-adjusted
  143. int32_t getQualityBoostFromRootQuality ( int32_t rootQuality ) ;
  144. int32_t getQuotaBoostFromRootQuality ( int32_t rootQuality ) ;
  145. int32_t getQuotaBoostFromQuality ( int32_t quality ) ;
  146. // if X% of the words are spammed, consider ALL the words to be spammed
  147. int32_t getMaxPercentForSpamFromQuality ( int32_t quality ) ;
  148. //private:
  149. // . parses and accesses a map/graph in the xml for us
  150. // . returns default "def" if map not present or x's in map unordered
  151. int32_t getY (int32_t n0,int32_t n1,int32_t X,char *strx,char *stry,int32_t def) ;
  152. */
  153. // these reference into m_data???
  154. Url m_site;
  155. //char m_coll[64];
  156. //int32_t m_collLen;
  157. // filenum determines the xml uniquely
  158. int32_t m_filenum;
  159. // did this rec have it's own entry in tagdb?
  160. bool m_hadRec;
  161. // did we get it by ip? (if not, we got it by canonical domain name)
  162. bool m_gotByIp;
  163. /*
  164. // . the xml describing this site
  165. // . references into an Xml stored in Sitedb class
  166. Xml *m_xml;
  167. */
  168. // a buffer for holding the little site record itself
  169. char m_data[CATREC_BUF_SIZE];
  170. int32_t m_dataSize;
  171. // category ID info for catdb
  172. unsigned char m_numCatids;
  173. int32_t *m_catids;
  174. int32_t m_numIndCatids;
  175. int32_t m_indCatids[MAX_IND_CATIDS];
  176. // version
  177. unsigned char m_version;
  178. /*
  179. unsigned char m_spamBits;
  180. unsigned char m_adultLevel;
  181. char m_siteQuality;
  182. uint8_t m_numTypes;
  183. uint8_t m_numLangs;
  184. SiteType m_siteTypes[MAX_SITE_TYPES];
  185. SiteType m_siteLangs[MAX_SITE_TYPES];
  186. */
  187. // url pointer
  188. char *m_url;
  189. int32_t m_urlLen;
  190. /*
  191. // time stamp, comment, username
  192. int32_t m_timeStamp;
  193. char *m_comment;
  194. char *m_username;
  195. // hack for addSiteType()
  196. int32_t *m_incHere;
  197. char *m_addHere ;
  198. // hack for changeFilenum()
  199. char *m_filenumPtr;
  200. */
  201. };
  202. #endif
  203. // format of a template or default record in xml:
  204. // ## NOTE: the key of the record is the sitename prefixed with the collection:
  205. // ## NOTE: "collectionName:" is prefixed to all hashed terms before hashing
  206. // ## LATER: do permission system
  207. // ## all indexed terms will be preceeded by "collection:" when indexed so you
  208. // ## can do a search within that collection.
  209. // <comment> %s </>
  210. // ## <addedDate> %s </> (stored as a int32_t)
  211. // <allowMimeType> %s </> (text, html?)
  212. // <allowExtension> %s </> (used iff allowAllExtensions is false)
  213. // ## the base quality of all docs from this site
  214. // <baseQuality> %c </> (0-100%,default 30,qual of docs in site)
  215. // ## the computed link-adjusted quality should not exceed this
  216. // <maxQuality> %c </> (0-100%, def 100)
  217. // ## should we treat incoming link text as if it were on our page?
  218. // ## score weights and maxes for the link text is determined by the linker's
  219. // ## own link-adjusted quality. (see graphs/maps below)
  220. // <indexIncomingLinkText> %b </> (0-100, default = 100, a %)
  221. // ## do links from this site always point to clean pages?
  222. // <linksClean> %b </> (default no)
  223. // ## a doc w/ link-adjusted quality LESS THAN this will not be indexed
  224. // <minQualityToIndex> %c </> (default 0% )
  225. // ## a doc w/ link-adjusted quality at or below this will be checked for
  226. // ## adult content.
  227. // <maxQualityForAdultDetect> %c </> (default 0%, 0 means none)
  228. // ## how often do we re-spider it?
  229. // ## we try to compute the best spider rate based on last modified times
  230. // <minSpiderFrequency> %i </> (default 60*60*24*30=1month, in seconds)
  231. // <maxSpiderFrequency> %i </> (default 60*60*24*30=1month, in seconds)
  232. // <spiderLinks> %b </> (default true)
  233. // <spiderLinkPriority> %"INT32" </> (0-7, default -1) -1 means prntPriorty-1
  234. // <spiderMaxPriority> %"INT32" </> (0-7, default 7)
  235. // ## these are fairly self-explanatory
  236. // <maxUrlLen> %i </> (default 0, 0 means none)
  237. // <minMetaRefresh> %i </> (default 6 )
  238. // <isBanned> %b </> (default no )
  239. // <isAdult> %b </> (default no )
  240. // <isISP> %b </> (default no )
  241. // <isTrusted> %b </> (default no )
  242. // <allowAdultContent> %b </> (default yes)
  243. // <allowCgiUrls> %b </> (default yes)
  244. // <allowIpUrls> %b </> (default yes)
  245. // <allowAllExtensions> %b </> (default yes)
  246. // <allowNonAsciiDocs> %b </> (default yes)
  247. // <delete404s> %b </> (default yes) from cache/titledb
  248. // <indexDupContent> %b </> (default yes)
  249. // <indexSite> %b </> (default yes) site: terms
  250. // <indexSubSite> %b </> (default yes) subsite: terms
  251. // <indexUrl> %b </> (default yes) url: terms
  252. // <indexSubUrl> %b </> (default yes) suburl: terms
  253. // <indexIp> %b </> (default yes) ip: terms
  254. // <indexLinks> %b </> (default yes) link:/href: terms
  255. // <maxDocs> %ul </> (default -1 = no max)
  256. // ## we don't have a security system... yet...
  257. // ## TODO: <maxCacheSpace> %ul </> (default 1024*1024)
  258. // ## TODO: <directorMaxScore> %s </> (256bit seal for maxScore tag above)
  259. // ## Now for some maps/graphs.
  260. // ## we list the 5 X components followed by the 5 Y components.
  261. // ## all maps/graphs linearly interpolate between the points.
  262. // ## the edge pieces are horizontal.
  263. // ## these maps can have up to 32 points but i typically just use 5.
  264. // ## we map the NUMBER of incoming links to a baseQuality BOOST for our doc.
  265. // ## the resulting new quality is the link-adjusted quality of the linkee doc.
  266. // ## These boosts are ADDED to the existing quality.
  267. // <numLinks11> %i </> (default 0 )
  268. // <numLinks12> %i </> (default 5 )
  269. // <numLinks13> %i </> (default 10 )
  270. // <numLinks14> %i </> (default 20 )
  271. // <numLinks15> %i </> (default 50 )
  272. // <qualityBoost11> %i </> (default 0% )
  273. // <qualityBoost12> %i </> (default 5% )
  274. // <qualityBoost13> %i </> (default 10% )
  275. // <qualityBoost14> %i </> (default 15% )
  276. // <qualityBoost15> %i </> (default 20% )
  277. // ## we map the SUM of the baseQuality of all linkers to a baseQuality BOOST.
  278. // ## the resulting new quality is the link-adjusted quality of the linkee doc.
  279. // ## we only add up BASE quality of the linkers.
  280. // ## we only add up 1 linker's BASE quality per site.
  281. // ## These boosts are ADDED to the existing quality.
  282. // <linkQualitySum21> %i </> (default 0 )
  283. // <linkQualitySum22> %i </> (default 50 )
  284. // <linkQualitySum23> %i </> (default 100 )
  285. // <linkQualitySum24> %i </> (default 150 )
  286. // <linkQualitySum25> %i </> (default 200 )
  287. // <qualityBoost21> %i </> (default 0% )
  288. // <qualityBoost22> %i </> (default 5% )
  289. // <qualityBoost23> %i </> (default 10% )
  290. // <qualityBoost24> %i </> (default 15% )
  291. // <qualityBoost25> %i </> (default 20% )
  292. // ## we map the LINK-ADJUSTED QUALITY of our root page (site url) to a
  293. // ## quality BOOST for us.
  294. // ## the site url is just our site, could be like http://about.com/
  295. // ## These boosts are ADDED to the existing quality.
  296. // <rootQuality31> %i </> (default 0 )
  297. // <rootQuality32> %i </> (default 50 )
  298. // <rootQuality33> %i </> (default 100 )
  299. // <rootQuality34> %i </> (default 200 )
  300. // <rootQuality35> %i </> (default 500 )
  301. // <qualityBoost31> %i </> (default 0% )
  302. // <qualityBoost32> %i </> (default 5% )
  303. // <qualityBoost33> %i </> (default 10% )
  304. // <qualityBoost34> %i </> (default 15% )
  305. // <qualityBoost35> %i </> (default 20% )
  306. // ## TODO: make based on quality of doc and length of link text!!
  307. // ## currently we limit link text to up to 256 chars in LinkInfo.cpp.
  308. // ## map doc's link-adjusted quality to scoreWeight of it's outgoing link text
  309. // <quality41> %i </> (default 0% )
  310. // <quality42> %i </> (default 30% )
  311. // <quality43> %i </> (default 50% )
  312. // <quality44> %i </> (default 70% )
  313. // <quality45> %i </> (default 85% )
  314. // <linkTextScoreWeight41> %i </> (default 50% )
  315. // <linkTextScoreWeight42> %i </> (default 100% )
  316. // <linkTextScoreWeight43> %i </> (default 130% )
  317. // <linkTextScoreWeight44> %i </> (default 180% )
  318. // <linkTextScoreWeight45> %i </> (default 250% )
  319. // ## map doc's link-adjusted quality to maxScore of it's outgoing link text.
  320. // ## maxScore applies to all docs from this site as to limit a site's impact.
  321. // <quality51> %i </> (default
  322. // <quality52> %i </>
  323. // <quality53> %i </>
  324. // <quality54> %i </>
  325. // <quality55> %i </>
  326. // <linkTextMaxScore51> %i </>
  327. // <linkTextMaxScore52> %i </>
  328. // <linkTextMaxScore53> %i </>
  329. // <linkTextMaxScore54> %i </>
  330. // <linkTextMaxScore55> %i </>
  331. // ## we map the LINK-ADJUSTED QUALITY of our ROOT page (site url) to a quota
  332. // ## boost. (can be negative)
  333. // ## the site url is just our site, could be like http://about.com/
  334. // ## These boosts are MULTIPLIED by the existing quota.
  335. // <rootQuality71> %i </> (default 0 )
  336. // <rootQuality72> %i </> (default 50 )
  337. // <rootQuality73> %i </> (default 100 )
  338. // <rootQuality74> %i </> (default 200 )
  339. // <rootQuality75> %i </> (default 500 )
  340. // <quotaBoost71> %i </> (default 0% )
  341. // <quotaBoost72> %i </> (default 0% )
  342. // <quotaBoost73> %i </> (default 0% )
  343. // <quotaBoost74> %i </> (default 0% )
  344. // <quotaBoost75> %i </> (default 0% )
  345. // ## we map the LINK-ADJUSTED QUALITY of our page (site url) to a quota
  346. // ## boost. (can be negative)
  347. // ## the site url is just our site, could be like http://about.com/
  348. // ## These boosts are MULTIPLIED by the existing quota.
  349. // <quality81> %i </> (default 0 )
  350. // <quality82> %i </> (default 50 )
  351. // <quality83> %i </> (default 100 )
  352. // <quality84> %i </> (default 200 )
  353. // <quality85> %i </> (default 500 )
  354. // <quotaBoost81> %i </> (default 0% )
  355. // <quotaBoost82> %i </> (default 0% )
  356. // <quotaBoost83> %i </> (default 0% )
  357. // <quotaBoost84> %i </> (default 0% )
  358. // <quotaBoost85> %i </> (default 0% )
  359. // ## the <index> node describes parsing/indexing rtu
  360. // ## used for xhtml tags (title, meta summary/keywords/description)
  361. // ## NOTE: <score2> <weight2> defines a point on the #words-to-score function
  362. // ## NOTE: omit <name> to index whole body (exculdes meta tags and xml tags)
  363. // ## NOTE: set <name> to "meta.summary" for indexing meta tag summary
  364. // ## NOTE: set <name> to "meta.keywords" for indexing meta tag keywords
  365. // ## NOTE: set <name> to "meta.description" for indexing meta tag keywords
  366. // ## NOTE: set <name> to "Xml" for indexing ALL xml tags
  367. // ## NOTE: set <name> to ??? for indexing text under that tag <???>...</>
  368. // <index>
  369. // <name> %s </> ("title","meta.summary","Xml","W")
  370. // <indexAsName> %s </> (for mapping pure xml tags)
  371. // <prefix> %s </> (like "title", "myTag:" -can omit)
  372. // <maxQualityForSpamDetect> %c </> (default 0, 0 means none)
  373. // <minQualityToIndex> %ul </> (0-255, default 0 ) do not index
  374. // <minDepth> %ul </> (0-inf, default 0 )
  375. // <maxDepth> %ul </> (0-inf, default inf)
  376. // <maxLenToIndex> %ul </> (0-inf, default inf)
  377. // <indexAllOccurences> %b </> (default no) (ex.: no for title)
  378. // <indexCRC> %b </> (default no ) index checksum?
  379. // <filterHtmlEntities> %b </> (default yes)
  380. // <indexIfUniqueOnly> %b </> (default no ) hash word iff unique
  381. // <indexSingletons> %b </> (default yes)
  382. // <indexPhrases> %b </> (default yes)
  383. // <indexAsWhole> %b </> (default no ) hash a checksum
  384. // <useStopWords> %b </> (default yes)
  385. // <useStems> %b </> (default yes)
  386. //
  387. // ## Map doc's (link-adjusted) quality to a maxLen for this field.
  388. // ## 30% quality is probably average.
  389. // ## NOTE: there really are no defaults for these, use tagdb default rec.
  390. // <quality11> %c </> (default 15% )
  391. // <quality12> %c </> (default 30% )
  392. // <quality13> %c </> (default 45% )
  393. // <quality14> %c </> (default 60% )
  394. // <quality15> %c </> (default 80% )
  395. // <maxLen11> %ul </> (default 80k )
  396. // <maxLen12> %ul </> (default 100k)
  397. // <maxLen13> %ul </> (default 150k)
  398. // <maxLen14> %ul </> (default 200k)
  399. // <maxLen15> %ul </> (default 250k)
  400. //
  401. // ## Map doc's (link-adjusted) quality to a maxScore for this field.
  402. // <quality21> %c </> (default 15% )
  403. // <quality22> %c </> (default 30% )
  404. // <quality23> %c </> (default 45% )
  405. // <quality24> %c </> (default 60% )
  406. // <quality25> %c </> (default 80% )
  407. // <maxScore21> %ul </> (default 30% )
  408. // <maxScore22> %ul </> (default 45% )
  409. // <maxScore23> %ul </> (default 60% )
  410. // <maxScore24> %ul </> (default 80% )
  411. // <maxScore25> %ul </> (default 100%)
  412. //
  413. // ## map doc (link-adjusted) quality to a scoreWeight for this field
  414. // <quality31> %c </> (default 15% )
  415. // <quality32> %c </> (default 30% )
  416. // <quality33> %c </> (default 45% )
  417. // <quality34> %c </> (default 60% )
  418. // <quality35> %c </> (default 80% )
  419. // <scoreWeight31> %ul </> (default 60% )
  420. // <scoreWeight32> %ul </> (default 100%)
  421. // <scoreWeight33> %ul </> (default 150%)
  422. // <scoreWeight34> %ul </> (default 200%)
  423. // <scoreWeight35> %ul </> (default 250%)
  424. //
  425. // ## map field length to a scoreWeight for this field
  426. // <len41> %ul </> (default 100) #w<100 -->wght=300
  427. // <len42> %ul </> (default 500) score in[200,300]
  428. // <len43> %ul </> (default 1000)
  429. // <len44> %ul </> (default 2000)
  430. // <len45> %ul </> (default 5000) if under/over 5000
  431. // <scoreWeight41> %ul </> (default 300%)
  432. // <scoreWeight42> %ul </> (default 200%)
  433. // <scoreWeight43> %ul </> (default 150%)
  434. // <scoreWeight44> %ul </> (default 100%)
  435. // <scoreWeight45> %ul </> (default 50%)
  436. //
  437. // ## map field length to a maxScore for this field
  438. // <len51> %ul </> (default 100) #w<100 -->wght=300
  439. // <len52> %ul </> (default 500) score in[200,300]
  440. // <len53> %ul </> (default 1000)
  441. // <len54> %ul </> (default 2000)
  442. // <len55> %ul </> (default 5000) if under/over 5000
  443. // <maxScore51> %ul </> (default 30% )
  444. // <maxScore52> %ul </> (default 45% )
  445. // <maxScore53> %ul </> (default 60% )
  446. // <maxScore54> %ul </> (default 80% )
  447. // <maxScore55> %ul </> (default 100%)
  448. //
  449. // </>
  450. // TODO:
  451. // <indexAsLong>, <indexAsBool>, ... for pure xml tags w/ special meaning
  452. //