PageRenderTime 52ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/src/tests/add-ons/kernel/file_systems/bfs/r5/Query.cpp

https://bitbucket.org/ddevine/haiku
C++ | 1578 lines | 1104 code | 291 blank | 183 comment | 412 complexity | 8ec0ecc0169e4767ccffdb6c9d3ea6ca MD5 | raw file
Possible License(s): Apache-2.0, LGPL-2.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, MIT, ISC, BSD-3-Clause, AGPL-1.0, GPL-2.0, GPL-3.0, LGPL-3.0
  1. /* Query - query parsing and evaluation
  2. *
  3. * The pattern matching is roughly based on code originally written
  4. * by J. Kercheval, and on code written by Kenneth Almquist, though
  5. * it shares no code.
  6. *
  7. * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de.
  8. * This file may be used under the terms of the MIT License.
  9. */
  10. #include "fsproto.h" // include first for hacky reasons
  11. #include "Query.h"
  12. #include "bfs.h"
  13. #include "Debug.h"
  14. #include "Stack.h"
  15. #include "Volume.h"
  16. #include "Inode.h"
  17. #include "BPlusTree.h"
  18. #include "Index.h"
  19. #include <util/kernel_cpp.h>
  20. #include <SupportDefs.h>
  21. #include <NodeMonitor.h>
  22. #include <TypeConstants.h>
  23. #include <AppDefs.h>
  24. #include <fs_query.h>
  25. #include <malloc.h>
  26. #include <stdio.h>
  27. #include <string.h>
  28. // The parser has a very static design, but it will do what is required.
  29. //
  30. // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
  31. // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
  32. // Apparently, the "!" (not) can only be used with brackets.
  33. //
  34. // If you think that there are too few NULL pointer checks in some places
  35. // of the code, just read the beginning of the query constructor.
  36. // The API is not fully available, just the Query and the Expression class
  37. // are.
  38. enum ops {
  39. OP_NONE,
  40. OP_AND,
  41. OP_OR,
  42. OP_EQUATION,
  43. OP_EQUAL,
  44. OP_UNEQUAL,
  45. OP_GREATER_THAN,
  46. OP_LESS_THAN,
  47. OP_GREATER_THAN_OR_EQUAL,
  48. OP_LESS_THAN_OR_EQUAL,
  49. };
  50. enum match {
  51. NO_MATCH = 0,
  52. MATCH_OK = 1,
  53. MATCH_BAD_PATTERN = -2,
  54. MATCH_INVALID_CHARACTER
  55. };
  56. // return values from isValidPattern()
  57. enum {
  58. PATTERN_INVALID_ESCAPE = -3,
  59. PATTERN_INVALID_RANGE,
  60. PATTERN_INVALID_SET
  61. };
  62. union value {
  63. int64 Int64;
  64. uint64 Uint64;
  65. int32 Int32;
  66. uint32 Uint32;
  67. float Float;
  68. double Double;
  69. char String[INODE_FILE_NAME_LENGTH];
  70. };
  71. // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
  72. // don't need the whole file here; the type can't change anyway
  73. #ifndef _MIME_H
  74. # define B_MIME_STRING_TYPE 'MIMS'
  75. #endif
  76. class Term {
  77. public:
  78. Term(int8 op) : fOp(op), fParent(NULL) {}
  79. virtual ~Term() {}
  80. int8 Op() const { return fOp; }
  81. void SetParent(Term *parent) { fParent = parent; }
  82. Term *Parent() const { return fParent; }
  83. virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,
  84. const uint8 *key = NULL,size_t size = 0) = 0;
  85. virtual void Complement() = 0;
  86. virtual void CalculateScore(Index &index) = 0;
  87. virtual int32 Score() const = 0;
  88. virtual status_t InitCheck() = 0;
  89. #ifdef DEBUG
  90. virtual void PrintToStream() = 0;
  91. #endif
  92. protected:
  93. int8 fOp;
  94. Term *fParent;
  95. };
  96. // Although an Equation object is quite independent from the volume on which
  97. // the query is run, there are some dependencies that are produced while
  98. // querying:
  99. // The type/size of the value, the score, and if it has an index or not.
  100. // So you could run more than one query on the same volume, but it might return
  101. // wrong values when it runs concurrently on another volume.
  102. // That's not an issue right now, because we run single-threaded and don't use
  103. // queries more than once.
  104. class Equation : public Term {
  105. public:
  106. Equation(char **expr);
  107. virtual ~Equation();
  108. virtual status_t InitCheck();
  109. status_t ParseQuotedString(char **_start, char **_end);
  110. char *CopyString(char *start, char *end);
  111. virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
  112. const uint8 *key = NULL, size_t size = 0);
  113. virtual void Complement();
  114. status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator,
  115. bool queryNonIndexed);
  116. status_t GetNextMatching(Volume *volume, TreeIterator *iterator,
  117. struct dirent *dirent, size_t bufferSize);
  118. virtual void CalculateScore(Index &index);
  119. virtual int32 Score() const { return fScore; }
  120. #ifdef DEBUG
  121. virtual void PrintToStream();
  122. #endif
  123. private:
  124. Equation(const Equation &);
  125. Equation &operator=(const Equation &);
  126. // no implementation
  127. status_t ConvertValue(type_code type);
  128. bool CompareTo(const uint8 *value, uint16 size);
  129. uint8 *Value() const { return (uint8 *)&fValue; }
  130. status_t MatchEmptyString();
  131. char *fAttribute;
  132. char *fString;
  133. union value fValue;
  134. type_code fType;
  135. size_t fSize;
  136. bool fIsPattern;
  137. bool fIsSpecialTime;
  138. int32 fScore;
  139. bool fHasIndex;
  140. };
  141. class Operator : public Term {
  142. public:
  143. Operator(Term *,int8,Term *);
  144. virtual ~Operator();
  145. Term *Left() const { return fLeft; }
  146. Term *Right() const { return fRight; }
  147. virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
  148. const uint8 *key = NULL, size_t size = 0);
  149. virtual void Complement();
  150. virtual void CalculateScore(Index &index);
  151. virtual int32 Score() const;
  152. virtual status_t InitCheck();
  153. //Term *Copy() const;
  154. #ifdef DEBUG
  155. virtual void PrintToStream();
  156. #endif
  157. private:
  158. Operator(const Operator &);
  159. Operator &operator=(const Operator &);
  160. // no implementation
  161. Term *fLeft,*fRight;
  162. };
  163. //---------------------------------
  164. void
  165. skipWhitespace(char **expr, int32 skip = 0)
  166. {
  167. char *string = (*expr) + skip;
  168. while (*string == ' ' || *string == '\t') string++;
  169. *expr = string;
  170. }
  171. void
  172. skipWhitespaceReverse(char **expr,char *stop)
  173. {
  174. char *string = *expr;
  175. while (string > stop && (*string == ' ' || *string == '\t')) string--;
  176. *expr = string;
  177. }
  178. // #pragma mark -
  179. uint32
  180. utf8ToUnicode(char **string)
  181. {
  182. uint8 *bytes = (uint8 *)*string;
  183. int32 length;
  184. uint8 mask = 0x1f;
  185. switch (bytes[0] & 0xf0) {
  186. case 0xc0:
  187. case 0xd0: length = 2; break;
  188. case 0xe0: length = 3; break;
  189. case 0xf0:
  190. mask = 0x0f;
  191. length = 4;
  192. break;
  193. default:
  194. // valid 1-byte character
  195. // and invalid characters
  196. (*string)++;
  197. return bytes[0];
  198. }
  199. uint32 c = bytes[0] & mask;
  200. int32 i = 1;
  201. for (;i < length && (bytes[i] & 0x80) > 0;i++)
  202. c = (c << 6) | (bytes[i] & 0x3f);
  203. if (i < length) {
  204. // invalid character
  205. (*string)++;
  206. return (uint32)bytes[0];
  207. }
  208. *string += length;
  209. return c;
  210. }
  211. int32
  212. getFirstPatternSymbol(char *string)
  213. {
  214. char c;
  215. for (int32 index = 0;(c = *string++);index++) {
  216. if (c == '*' || c == '?' || c == '[')
  217. return index;
  218. }
  219. return -1;
  220. }
  221. bool
  222. isPattern(char *string)
  223. {
  224. return getFirstPatternSymbol(string) >= 0 ? true : false;
  225. }
  226. status_t
  227. isValidPattern(char *pattern)
  228. {
  229. while (*pattern) {
  230. switch (*pattern++) {
  231. case '\\':
  232. // the escape character must not be at the end of the pattern
  233. if (!*pattern++)
  234. return PATTERN_INVALID_ESCAPE;
  235. break;
  236. case '[':
  237. if (pattern[0] == ']' || !pattern[0])
  238. return PATTERN_INVALID_SET;
  239. while (*pattern != ']') {
  240. if (*pattern == '\\' && !*++pattern)
  241. return PATTERN_INVALID_ESCAPE;
  242. if (!*pattern)
  243. return PATTERN_INVALID_SET;
  244. if (pattern[0] == '-' && pattern[1] == '-')
  245. return PATTERN_INVALID_RANGE;
  246. pattern++;
  247. }
  248. break;
  249. }
  250. }
  251. return B_OK;
  252. }
  253. /** Matches the string against the given wildcard pattern.
  254. * Returns either MATCH_OK, or NO_MATCH when everything went fine,
  255. * or values < 0 (see enum at the top of Query.cpp) if an error
  256. * occurs
  257. */
  258. status_t
  259. matchString(char *pattern, char *string)
  260. {
  261. while (*pattern) {
  262. // end of string == valid end of pattern?
  263. if (!string[0]) {
  264. while (pattern[0] == '*')
  265. pattern++;
  266. return !pattern[0] ? MATCH_OK : NO_MATCH;
  267. }
  268. switch (*pattern++) {
  269. case '?':
  270. {
  271. // match exactly one UTF-8 character; we are
  272. // not interested in the result
  273. utf8ToUnicode(&string);
  274. break;
  275. }
  276. case '*':
  277. {
  278. // compact pattern
  279. while (true) {
  280. if (pattern[0] == '?') {
  281. if (!*++string)
  282. return NO_MATCH;
  283. } else if (pattern[0] != '*')
  284. break;
  285. pattern++;
  286. }
  287. // if the pattern is done, we have matched the string
  288. if (!pattern[0])
  289. return MATCH_OK;
  290. while(true) {
  291. // we have removed all occurences of '*' and '?'
  292. if (pattern[0] == string[0]
  293. || pattern[0] == '['
  294. || pattern[0] == '\\') {
  295. status_t status = matchString(pattern,string);
  296. if (status < B_OK || status == MATCH_OK)
  297. return status;
  298. }
  299. // we could be nice here and just jump to the next
  300. // UTF-8 character - but we wouldn't gain that much
  301. // and it'd be slower (since we're checking for
  302. // equality before entering the recursion)
  303. if (!*++string)
  304. return NO_MATCH;
  305. }
  306. break;
  307. }
  308. case '[':
  309. {
  310. bool invert = false;
  311. if (pattern[0] == '^' || pattern[0] == '!') {
  312. invert = true;
  313. pattern++;
  314. }
  315. if (!pattern[0] || pattern[0] == ']')
  316. return MATCH_BAD_PATTERN;
  317. uint32 c = utf8ToUnicode(&string);
  318. bool matched = false;
  319. while (pattern[0] != ']') {
  320. if (!pattern[0])
  321. return MATCH_BAD_PATTERN;
  322. if (pattern[0] == '\\')
  323. pattern++;
  324. uint32 first = utf8ToUnicode(&pattern);
  325. // Does this character match, or is this a range?
  326. if (first == c) {
  327. matched = true;
  328. break;
  329. } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) {
  330. pattern++;
  331. if (pattern[0] == '\\') {
  332. pattern++;
  333. if (!pattern[0])
  334. return MATCH_BAD_PATTERN;
  335. }
  336. uint32 last = utf8ToUnicode(&pattern);
  337. if (c >= first && c <= last) {
  338. matched = true;
  339. break;
  340. }
  341. }
  342. }
  343. if (invert)
  344. matched = !matched;
  345. if (matched) {
  346. while (pattern[0] != ']') {
  347. if (!pattern[0])
  348. return MATCH_BAD_PATTERN;
  349. pattern++;
  350. }
  351. pattern++;
  352. break;
  353. }
  354. return NO_MATCH;
  355. }
  356. case '\\':
  357. if (!pattern[0])
  358. return MATCH_BAD_PATTERN;
  359. // supposed to fall through
  360. default:
  361. if (pattern[-1] != string[0])
  362. return NO_MATCH;
  363. string++;
  364. break;
  365. }
  366. }
  367. if (string[0])
  368. return NO_MATCH;
  369. return MATCH_OK;
  370. }
  371. // #pragma mark -
  372. Equation::Equation(char **expr)
  373. : Term(OP_EQUATION),
  374. fAttribute(NULL),
  375. fString(NULL),
  376. fType(0),
  377. fIsPattern(false)
  378. {
  379. char *string = *expr;
  380. char *start = string;
  381. char *end = NULL;
  382. // Since the equation is the integral part of any query, we're just parsing
  383. // the whole thing here.
  384. // The whitespace at the start is already removed in Expression::ParseEquation()
  385. if (*start == '"' || *start == '\'') {
  386. // string is quoted (start has to be on the beginning of a string)
  387. if (ParseQuotedString(&start, &end) < B_OK)
  388. return;
  389. // set string to a valid start of the equation symbol
  390. string = end + 2;
  391. skipWhitespace(&string);
  392. if (*string != '=' && *string != '<' && *string != '>' && *string != '!') {
  393. *expr = string;
  394. return;
  395. }
  396. } else {
  397. // search the (in)equation for the actual equation symbol (and for other operators
  398. // in case the equation is malformed)
  399. while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!'
  400. && *string != '&' && *string != '|')
  401. string++;
  402. // get the attribute string (and trim whitespace), in case
  403. // the string was not quoted
  404. end = string - 1;
  405. skipWhitespaceReverse(&end, start);
  406. }
  407. // attribute string is empty (which is not allowed)
  408. if (start > end)
  409. return;
  410. // at this point, "start" points to the beginning of the string, "end" points
  411. // to the last character of the string, and "string" points to the first
  412. // character of the equation symbol
  413. // test for the right symbol (as this doesn't need any memory)
  414. switch (*string) {
  415. case '=':
  416. fOp = OP_EQUAL;
  417. break;
  418. case '>':
  419. fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
  420. break;
  421. case '<':
  422. fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
  423. break;
  424. case '!':
  425. if (*(string + 1) != '=')
  426. return;
  427. fOp = OP_UNEQUAL;
  428. break;
  429. // any invalid characters will be rejected
  430. default:
  431. *expr = string;
  432. return;
  433. }
  434. // lets change "start" to point to the first character after the symbol
  435. if (*(string + 1) == '=')
  436. string++;
  437. string++;
  438. skipWhitespace(&string);
  439. // allocate & copy the attribute string
  440. fAttribute = CopyString(start, end);
  441. if (fAttribute == NULL)
  442. return;
  443. start = string;
  444. if (*start == '"' || *start == '\'') {
  445. // string is quoted (start has to be on the beginning of a string)
  446. if (ParseQuotedString(&start, &end) < B_OK)
  447. return;
  448. string = end + 2;
  449. skipWhitespace(&string);
  450. } else {
  451. while (*string && *string != '&' && *string != '|' && *string != ')')
  452. string++;
  453. end = string - 1;
  454. skipWhitespaceReverse(&end, start);
  455. }
  456. // at this point, "start" will point to the first character of the value,
  457. // "end" will point to its last character, and "start" to the first non-
  458. // whitespace character after the value string
  459. fString = CopyString(start, end);
  460. if (fString == NULL)
  461. return;
  462. // patterns are only allowed for these operations (and strings)
  463. if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
  464. fIsPattern = isPattern(fString);
  465. if (fIsPattern && isValidPattern(fString) < B_OK) {
  466. // we only want to have valid patterns; setting fString
  467. // to NULL will cause InitCheck() to fail
  468. free(fString);
  469. fString = NULL;
  470. }
  471. }
  472. // The special time flag is set if the time values are shifted
  473. // 64-bit values to reduce the number of duplicates.
  474. // We have to be able to compare them against unshifted values
  475. // later. The only index which needs this is the last_modified
  476. // index, but we may want to open that feature for other indices,
  477. // too one day.
  478. fIsSpecialTime = !strcmp(fAttribute, "last_modified");
  479. *expr = string;
  480. }
  481. Equation::~Equation()
  482. {
  483. if (fAttribute != NULL)
  484. free(fAttribute);
  485. if (fString != NULL)
  486. free(fString);
  487. }
  488. status_t
  489. Equation::InitCheck()
  490. {
  491. if (fAttribute == NULL
  492. || fString == NULL
  493. || fOp == OP_NONE)
  494. return B_BAD_VALUE;
  495. return B_OK;
  496. }
  497. status_t
  498. Equation::ParseQuotedString(char **_start, char **_end)
  499. {
  500. char *start = *_start;
  501. char quote = *start++;
  502. char *end = start;
  503. for (;*end && *end != quote;end++) {
  504. if (*end == '\\')
  505. end++;
  506. }
  507. if (*end == '\0')
  508. return B_BAD_VALUE;
  509. *_start = start;
  510. *_end = end - 1;
  511. return B_OK;
  512. }
  513. char *
  514. Equation::CopyString(char *start, char *end)
  515. {
  516. // end points to the last character of the string - and the length
  517. // also has to include the null-termination
  518. int32 length = end + 2 - start;
  519. // just to make sure; since that's the max. attribute name length and
  520. // the max. string in an index, it make sense to have it that way
  521. if (length > INODE_FILE_NAME_LENGTH || length <= 0)
  522. return NULL;
  523. char *copy = (char *)malloc(length);
  524. if (copy == NULL)
  525. return NULL;
  526. memcpy(copy,start,length - 1);
  527. copy[length - 1] = '\0';
  528. return copy;
  529. }
  530. status_t
  531. Equation::ConvertValue(type_code type)
  532. {
  533. // Has the type already been converted?
  534. if (type == fType)
  535. return B_OK;
  536. char *string = fString;
  537. switch (type) {
  538. case B_MIME_STRING_TYPE:
  539. type = B_STRING_TYPE;
  540. // supposed to fall through
  541. case B_STRING_TYPE:
  542. strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH);
  543. fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0';
  544. fSize = strlen(fValue.String);
  545. break;
  546. case B_INT32_TYPE:
  547. fValue.Int32 = strtol(string, &string, 0);
  548. fSize = sizeof(int32);
  549. break;
  550. case B_UINT32_TYPE:
  551. fValue.Int32 = strtoul(string, &string, 0);
  552. fSize = sizeof(uint32);
  553. break;
  554. case B_INT64_TYPE:
  555. fValue.Int64 = strtoll(string, &string, 0);
  556. fSize = sizeof(int64);
  557. break;
  558. case B_UINT64_TYPE:
  559. fValue.Uint64 = strtoull(string, &string, 0);
  560. fSize = sizeof(uint64);
  561. break;
  562. case B_FLOAT_TYPE:
  563. fValue.Float = strtod(string, &string);
  564. fSize = sizeof(float);
  565. break;
  566. case B_DOUBLE_TYPE:
  567. fValue.Double = strtod(string, &string);
  568. fSize = sizeof(double);
  569. break;
  570. default:
  571. FATAL(("query value conversion to 0x%lx requested!\n", type));
  572. // should we fail here or just do a safety int32 conversion?
  573. return B_ERROR;
  574. }
  575. fType = type;
  576. // patterns are only allowed for string types
  577. if (fType != B_STRING_TYPE && fIsPattern)
  578. fIsPattern = false;
  579. return B_OK;
  580. }
  581. /** Returns true when the key matches the equation. You have to
  582. * call ConvertValue() before this one.
  583. */
  584. bool
  585. Equation::CompareTo(const uint8 *value, uint16 size)
  586. {
  587. int32 compare;
  588. // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL
  589. if (fIsPattern) {
  590. // we have already validated the pattern, so we don't check for failing
  591. // here - if something is broken, and matchString() returns an error,
  592. // we just don't match
  593. compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1;
  594. } else if (fIsSpecialTime) {
  595. // the index is a shifted int64 index, but we have to match
  596. // against an unshifted value (i.e. the last_modified index)
  597. int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT;
  598. compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64));
  599. } else
  600. compare = compareKeys(fType, value, size, Value(), fSize);
  601. switch (fOp) {
  602. case OP_EQUAL:
  603. return compare == 0;
  604. case OP_UNEQUAL:
  605. return compare != 0;
  606. case OP_LESS_THAN:
  607. return compare < 0;
  608. case OP_LESS_THAN_OR_EQUAL:
  609. return compare <= 0;
  610. case OP_GREATER_THAN:
  611. return compare > 0;
  612. case OP_GREATER_THAN_OR_EQUAL:
  613. return compare >= 0;
  614. }
  615. FATAL(("Unknown/Unsupported operation: %d\n", fOp));
  616. return false;
  617. }
  618. void
  619. Equation::Complement()
  620. {
  621. D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
  622. FATAL(("op out of range!"));
  623. return;
  624. });
  625. int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
  626. OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
  627. fOp = complementOp[fOp - OP_EQUAL];
  628. }
  629. status_t
  630. Equation::MatchEmptyString()
  631. {
  632. // there is no matching attribute, we will just bail out if we
  633. // already know that our value is not of a string type.
  634. // If not, it will be converted to a string - and then be compared with "".
  635. // That's why we have to call ConvertValue() here - but it will be
  636. // a cheap call for the next time
  637. // Should we do this only for OP_UNEQUAL?
  638. if (fType != 0 && fType != B_STRING_TYPE)
  639. return NO_MATCH;
  640. status_t status = ConvertValue(B_STRING_TYPE);
  641. if (status == B_OK)
  642. status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH;
  643. return status;
  644. }
  645. /** Matches the inode's attribute value with the equation.
  646. * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong
  647. */
  648. status_t
  649. Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size)
  650. {
  651. // get a pointer to the attribute in question
  652. union value value;
  653. uint8 *buffer;
  654. bool locked = false;
  655. // first, check if we are matching for a live query and use that value
  656. if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
  657. if (key == NULL) {
  658. if (type == B_STRING_TYPE)
  659. return MatchEmptyString();
  660. return NO_MATCH;
  661. }
  662. buffer = const_cast<uint8 *>(key);
  663. } else if (!strcmp(fAttribute, "name")) {
  664. // we need to lock before accessing Inode::Name()
  665. inode->SmallDataLock().Lock();
  666. locked = true;
  667. // if not, check for "fake" attributes, "name", "size", "last_modified",
  668. buffer = (uint8 *)inode->Name();
  669. if (buffer == NULL) {
  670. inode->SmallDataLock().Unlock();
  671. return B_ERROR;
  672. }
  673. type = B_STRING_TYPE;
  674. size = strlen((const char *)buffer);
  675. } else if (!strcmp(fAttribute,"size")) {
  676. buffer = (uint8 *)&inode->Node()->data.size;
  677. type = B_INT64_TYPE;
  678. } else if (!strcmp(fAttribute,"last_modified")) {
  679. buffer = (uint8 *)&inode->Node()->last_modified_time;
  680. type = B_INT64_TYPE;
  681. } else {
  682. // then for attributes in the small_data section, and finally for the
  683. // real attributes
  684. Inode *attribute;
  685. inode->SmallDataLock().Lock();
  686. small_data *smallData = inode->FindSmallData(fAttribute);
  687. if (smallData != NULL) {
  688. buffer = smallData->Data();
  689. type = smallData->type;
  690. size = smallData->data_size;
  691. locked = true;
  692. } else {
  693. // needed to unlock the small_data section as fast as possible
  694. inode->SmallDataLock().Unlock();
  695. if (inode->GetAttribute(fAttribute, &attribute) == B_OK) {
  696. buffer = (uint8 *)&value;
  697. type = attribute->Node()->type;
  698. size = attribute->Size();
  699. if (size > INODE_FILE_NAME_LENGTH)
  700. size = INODE_FILE_NAME_LENGTH;
  701. if (attribute->ReadAt(0, buffer, &size) < B_OK) {
  702. inode->ReleaseAttribute(attribute);
  703. return B_IO_ERROR;
  704. }
  705. inode->ReleaseAttribute(attribute);
  706. } else
  707. return MatchEmptyString();
  708. }
  709. }
  710. // prepare own value for use, if it is possible to convert it
  711. status_t status = ConvertValue(type);
  712. if (status == B_OK)
  713. status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
  714. if (locked)
  715. inode->SmallDataLock().Unlock();
  716. RETURN_ERROR(status);
  717. }
  718. void
  719. Equation::CalculateScore(Index &index)
  720. {
  721. // As always, these values could be tuned and refined.
  722. // And the code could also need some real world testing :-)
  723. // do we have to operate on a "foreign" index?
  724. if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
  725. fScore = 0;
  726. return;
  727. }
  728. // if we have a pattern, how much does it help our search?
  729. if (fIsPattern)
  730. fScore = getFirstPatternSymbol(fString) << 3;
  731. else {
  732. // Score by operator
  733. if (fOp == OP_EQUAL)
  734. // higher than pattern="255 chars+*"
  735. fScore = 2048;
  736. else
  737. // the pattern search is regarded cheaper when you have at
  738. // least one character to set your index to
  739. fScore = 5;
  740. }
  741. // take index size into account (1024 is the current node size
  742. // in our B+trees)
  743. // 2048 * 2048 == 4194304 is the maximum score (for an empty
  744. // tree, since the header + 1 node are already 2048 bytes)
  745. fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
  746. }
  747. status_t
  748. Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed)
  749. {
  750. status_t status = index.SetTo(fAttribute);
  751. // if we should query attributes without an index, we can just proceed here
  752. if (status < B_OK && !queryNonIndexed)
  753. return B_ENTRY_NOT_FOUND;
  754. type_code type;
  755. // special case for OP_UNEQUAL - it will always operate through the whole index
  756. // but we need the call to the original index to get the correct type
  757. if (status < B_OK || fOp == OP_UNEQUAL) {
  758. // Try to get an index that holds all files (name)
  759. // Also sets the default type for all attributes without index
  760. // to string.
  761. type = status < B_OK ? B_STRING_TYPE : index.Type();
  762. if (index.SetTo("name") < B_OK)
  763. return B_ENTRY_NOT_FOUND;
  764. fHasIndex = false;
  765. } else {
  766. fHasIndex = true;
  767. type = index.Type();
  768. }
  769. if (ConvertValue(type) < B_OK)
  770. return B_BAD_VALUE;
  771. BPlusTree *tree;
  772. if (index.Node()->GetTree(&tree) < B_OK)
  773. return B_ERROR;
  774. *iterator = new TreeIterator(tree);
  775. if (*iterator == NULL)
  776. return B_NO_MEMORY;
  777. if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL
  778. || fIsPattern)
  779. && fHasIndex) {
  780. // set iterator to the exact position
  781. int32 keySize = index.KeySize();
  782. // at this point, fIsPattern is only true if it's a string type, and fOp
  783. // is either OP_EQUAL or OP_UNEQUAL
  784. if (fIsPattern) {
  785. // let's see if we can use the beginning of the key for positioning
  786. // the iterator and adjust the key size; if not, just leave the
  787. // iterator at the start and return success
  788. keySize = getFirstPatternSymbol(fString);
  789. if (keySize <= 0)
  790. return B_OK;
  791. }
  792. if (keySize == 0) {
  793. // B_STRING_TYPE doesn't have a fixed length, so it was set
  794. // to 0 before - we compute the correct value here
  795. if (fType == B_STRING_TYPE) {
  796. keySize = strlen(fValue.String);
  797. // The empty string is a special case - we normally don't check
  798. // for the trailing null byte, in the case for the empty string
  799. // we do it explicitly, because there can't be keys in the B+tree
  800. // with a length of zero
  801. if (keySize == 0)
  802. keySize = 1;
  803. } else
  804. RETURN_ERROR(B_ENTRY_NOT_FOUND);
  805. }
  806. if (fIsSpecialTime) {
  807. // we have to find the first matching shifted value
  808. off_t value = fValue.Int64 << INODE_TIME_SHIFT;
  809. status = (*iterator)->Find((uint8 *)&value, keySize);
  810. if (status == B_ENTRY_NOT_FOUND)
  811. return B_OK;
  812. } else {
  813. status = (*iterator)->Find(Value(), keySize);
  814. if (fOp == OP_EQUAL && !fIsPattern)
  815. return status;
  816. else if (status == B_ENTRY_NOT_FOUND
  817. && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL))
  818. return B_OK;
  819. }
  820. RETURN_ERROR(status);
  821. }
  822. return B_OK;
  823. }
  824. status_t
  825. Equation::GetNextMatching(Volume *volume, TreeIterator *iterator,
  826. struct dirent *dirent, size_t bufferSize)
  827. {
  828. while (true) {
  829. union value indexValue;
  830. uint16 keyLength;
  831. uint16 duplicate;
  832. off_t offset;
  833. status_t status = iterator->GetNextEntry(&indexValue, &keyLength,
  834. (uint16)sizeof(indexValue), &offset, &duplicate);
  835. if (status < B_OK)
  836. return status;
  837. // only compare against the index entry when this is the correct
  838. // index for the equation
  839. if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) {
  840. // They aren't equal? let the operation decide what to do
  841. // Since we always start at the beginning of the index (or the correct
  842. // position), only some needs to be stopped if the entry doesn't fit.
  843. if (fOp == OP_LESS_THAN
  844. || fOp == OP_LESS_THAN_OR_EQUAL
  845. || (fOp == OP_EQUAL && !fIsPattern))
  846. return B_ENTRY_NOT_FOUND;
  847. if (duplicate > 0)
  848. iterator->SkipDuplicates();
  849. continue;
  850. }
  851. Vnode vnode(volume, offset);
  852. Inode *inode;
  853. if ((status = vnode.Get(&inode)) != B_OK) {
  854. REPORT_ERROR(status);
  855. FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute));
  856. // try with next
  857. continue;
  858. }
  859. // ToDo: check user permissions here - but which one?!
  860. // we could filter out all those where we don't have
  861. // read access... (we should check for every parent
  862. // directory if the X_OK is allowed)
  863. // Although it's quite expensive to open all parents,
  864. // it's likely that the application that runs the
  865. // query will do something similar (and we don't have
  866. // to do it for root, either).
  867. // go up in the tree until a &&-operator is found, and check if the
  868. // inode matches with the rest of the expression - we don't have to
  869. // check ||-operators for that
  870. Term *term = this;
  871. status = MATCH_OK;
  872. if (!fHasIndex)
  873. status = Match(inode);
  874. while (term != NULL && status == MATCH_OK) {
  875. Operator *parent = (Operator *)term->Parent();
  876. if (parent == NULL)
  877. break;
  878. if (parent->Op() == OP_AND) {
  879. // choose the other child of the parent
  880. Term *other = parent->Right();
  881. if (other == term)
  882. other = parent->Left();
  883. if (other == NULL) {
  884. FATAL(("&&-operator has only one child... (parent = %p)\n", parent));
  885. break;
  886. }
  887. status = other->Match(inode);
  888. if (status < 0) {
  889. REPORT_ERROR(status);
  890. status = NO_MATCH;
  891. }
  892. }
  893. term = (Term *)parent;
  894. }
  895. if (status == MATCH_OK) {
  896. dirent->d_dev = volume->ID();
  897. dirent->d_ino = offset;
  898. dirent->d_pdev = volume->ID();
  899. dirent->d_pino = volume->ToVnode(inode->Parent());
  900. if (inode->GetName(dirent->d_name) < B_OK)
  901. FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber()));
  902. #ifdef KEEP_WRONG_DIRENT_RECLEN
  903. // ToDo: The available file systems in BeOS apparently don't set the
  904. // correct d_reclen - we are copying that behaviour if requested, but
  905. // if it doesn't break compatibility, we will remove it.
  906. dirent->d_reclen = strlen(dirent->d_name);
  907. #else
  908. dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name);
  909. #endif
  910. }
  911. if (status == MATCH_OK)
  912. return B_OK;
  913. }
  914. RETURN_ERROR(B_ERROR);
  915. }
  916. // #pragma mark -
  917. Operator::Operator(Term *left, int8 op, Term *right)
  918. : Term(op),
  919. fLeft(left),
  920. fRight(right)
  921. {
  922. if (left)
  923. left->SetParent(this);
  924. if (right)
  925. right->SetParent(this);
  926. }
  927. Operator::~Operator()
  928. {
  929. delete fLeft;
  930. delete fRight;
  931. }
  932. status_t
  933. Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size)
  934. {
  935. if (fOp == OP_AND) {
  936. status_t status = fLeft->Match(inode, attribute, type, key, size);
  937. if (status != MATCH_OK)
  938. return status;
  939. return fRight->Match(inode, attribute, type, key, size);
  940. } else {
  941. // choose the term with the better score for OP_OR
  942. if (fRight->Score() > fLeft->Score()) {
  943. status_t status = fRight->Match(inode, attribute, type, key, size);
  944. if (status != NO_MATCH)
  945. return status;
  946. }
  947. return fLeft->Match(inode, attribute, type, key, size);
  948. }
  949. }
  950. void
  951. Operator::Complement()
  952. {
  953. if (fOp == OP_AND)
  954. fOp = OP_OR;
  955. else
  956. fOp = OP_AND;
  957. fLeft->Complement();
  958. fRight->Complement();
  959. }
  960. void
  961. Operator::CalculateScore(Index &index)
  962. {
  963. fLeft->CalculateScore(index);
  964. fRight->CalculateScore(index);
  965. }
  966. int32
  967. Operator::Score() const
  968. {
  969. if (fOp == OP_AND) {
  970. // return the one with the better score
  971. if (fRight->Score() > fLeft->Score())
  972. return fRight->Score();
  973. return fLeft->Score();
  974. }
  975. // for OP_OR, be honest, and return the one with the worse score
  976. if (fRight->Score() < fLeft->Score())
  977. return fRight->Score();
  978. return fLeft->Score();
  979. }
  980. status_t
  981. Operator::InitCheck()
  982. {
  983. if (fOp != OP_AND && fOp != OP_OR
  984. || fLeft == NULL || fLeft->InitCheck() < B_OK
  985. || fRight == NULL || fRight->InitCheck() < B_OK)
  986. return B_ERROR;
  987. return B_OK;
  988. }
  989. #if 0
  990. Term *
  991. Operator::Copy() const
  992. {
  993. if (fEquation != NULL) {
  994. Equation *equation = new Equation(*fEquation);
  995. if (equation == NULL)
  996. return NULL;
  997. Term *term = new Term(equation);
  998. if (term == NULL)
  999. delete equation;
  1000. return term;
  1001. }
  1002. Term *left = NULL, *right = NULL;
  1003. if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
  1004. return NULL;
  1005. if (fRight != NULL && (right = fRight->Copy()) == NULL) {
  1006. delete left;
  1007. return NULL;
  1008. }
  1009. Term *term = new Term(left,fOp,right);
  1010. if (term == NULL) {
  1011. delete left;
  1012. delete right;
  1013. return NULL;
  1014. }
  1015. return term;
  1016. }
  1017. #endif
  1018. // #pragma mark -
  1019. #ifdef DEBUG
  1020. void
  1021. Operator::PrintToStream()
  1022. {
  1023. D(__out("( "));
  1024. if (fLeft != NULL)
  1025. fLeft->PrintToStream();
  1026. char *op;
  1027. switch (fOp) {
  1028. case OP_OR: op = "OR"; break;
  1029. case OP_AND: op = "AND"; break;
  1030. default: op = "?"; break;
  1031. }
  1032. D(__out(" %s ",op));
  1033. if (fRight != NULL)
  1034. fRight->PrintToStream();
  1035. D(__out(" )"));
  1036. }
  1037. void
  1038. Equation::PrintToStream()
  1039. {
  1040. char *symbol = "???";
  1041. switch (fOp) {
  1042. case OP_EQUAL: symbol = "=="; break;
  1043. case OP_UNEQUAL: symbol = "!="; break;
  1044. case OP_GREATER_THAN: symbol = ">"; break;
  1045. case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
  1046. case OP_LESS_THAN: symbol = "<"; break;
  1047. case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
  1048. }
  1049. D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString));
  1050. }
  1051. #endif /* DEBUG */
  1052. // #pragma mark -
  1053. Expression::Expression(char *expr)
  1054. {
  1055. if (expr == NULL)
  1056. return;
  1057. fTerm = ParseOr(&expr);
  1058. if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
  1059. FATAL(("Corrupt tree in expression!\n"));
  1060. delete fTerm;
  1061. fTerm = NULL;
  1062. }
  1063. D(if (fTerm != NULL) {
  1064. fTerm->PrintToStream();
  1065. D(__out("\n"));
  1066. if (*expr != '\0')
  1067. PRINT(("Unexpected end of string: \"%s\"!\n", expr));
  1068. });
  1069. fPosition = expr;
  1070. }
  1071. Expression::~Expression()
  1072. {
  1073. delete fTerm;
  1074. }
  1075. Term *
  1076. Expression::ParseEquation(char **expr)
  1077. {
  1078. skipWhitespace(expr);
  1079. bool _not = false;
  1080. if (**expr == '!') {
  1081. skipWhitespace(expr, 1);
  1082. if (**expr != '(')
  1083. return NULL;
  1084. _not = true;
  1085. }
  1086. if (**expr == ')') {
  1087. // shouldn't be handled here
  1088. return NULL;
  1089. } else if (**expr == '(') {
  1090. skipWhitespace(expr, 1);
  1091. Term *term = ParseOr(expr);
  1092. skipWhitespace(expr);
  1093. if (**expr != ')') {
  1094. delete term;
  1095. return NULL;
  1096. }
  1097. // If the term is negated, we just complement the tree, to get
  1098. // rid of the not, a.k.a. DeMorgan's Law.
  1099. if (_not)
  1100. term->Complement();
  1101. skipWhitespace(expr, 1);
  1102. return term;
  1103. }
  1104. Equation *equation = new Equation(expr);
  1105. if (equation == NULL || equation->InitCheck() < B_OK) {
  1106. delete equation;
  1107. return NULL;
  1108. }
  1109. return equation;
  1110. }
  1111. Term *
  1112. Expression::ParseAnd(char **expr)
  1113. {
  1114. Term *left = ParseEquation(expr);
  1115. if (left == NULL)
  1116. return NULL;
  1117. while (IsOperator(expr,'&')) {
  1118. Term *right = ParseAnd(expr);
  1119. Term *newParent = NULL;
  1120. if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) {
  1121. delete left;
  1122. delete right;
  1123. return NULL;
  1124. }
  1125. left = newParent;
  1126. }
  1127. return left;
  1128. }
  1129. Term *
  1130. Expression::ParseOr(char **expr)
  1131. {
  1132. Term *left = ParseAnd(expr);
  1133. if (left == NULL)
  1134. return NULL;
  1135. while (IsOperator(expr,'|')) {
  1136. Term *right = ParseAnd(expr);
  1137. Term *newParent = NULL;
  1138. if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) {
  1139. delete left;
  1140. delete right;
  1141. return NULL;
  1142. }
  1143. left = newParent;
  1144. }
  1145. return left;
  1146. }
  1147. bool
  1148. Expression::IsOperator(char **expr, char op)
  1149. {
  1150. char *string = *expr;
  1151. if (*string == op && *(string + 1) == op) {
  1152. *expr += 2;
  1153. return true;
  1154. }
  1155. return false;
  1156. }
  1157. status_t
  1158. Expression::InitCheck()
  1159. {
  1160. if (fTerm == NULL)
  1161. return B_BAD_VALUE;
  1162. return B_OK;
  1163. }
  1164. // #pragma mark -
  1165. Query::Query(Volume *volume, Expression *expression, uint32 flags)
  1166. :
  1167. fVolume(volume),
  1168. fExpression(expression),
  1169. fCurrent(NULL),
  1170. fIterator(NULL),
  1171. fIndex(volume),
  1172. fFlags(flags),
  1173. fPort(-1)
  1174. {
  1175. // if the expression has a valid root pointer, the whole tree has
  1176. // already passed the sanity check, so that we don't have to check
  1177. // every pointer
  1178. if (volume == NULL || expression == NULL || expression->Root() == NULL)
  1179. return;
  1180. // create index on the stack and delete it afterwards
  1181. fExpression->Root()->CalculateScore(fIndex);
  1182. fIndex.Unset();
  1183. Stack<Term *> stack;
  1184. stack.Push(fExpression->Root());
  1185. Term *term;
  1186. while (stack.Pop(&term)) {
  1187. if (term->Op() < OP_EQUATION) {
  1188. Operator *op = (Operator *)term;
  1189. if (op->Op() == OP_OR) {
  1190. stack.Push(op->Left());
  1191. stack.Push(op->Right());
  1192. } else {
  1193. // For OP_AND, we can use the scoring system to decide which path to add
  1194. if (op->Right()->Score() > op->Left()->Score())
  1195. stack.Push(op->Right());
  1196. else
  1197. stack.Push(op->Left());
  1198. }
  1199. } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK)
  1200. FATAL(("Unknown term on stack or stack error"));
  1201. }
  1202. if (fFlags & B_LIVE_QUERY)
  1203. volume->AddQuery(this);
  1204. }
  1205. Query::~Query()
  1206. {
  1207. if (fFlags & B_LIVE_QUERY)
  1208. fVolume->RemoveQuery(this);
  1209. }
  1210. status_t
  1211. Query::GetNextEntry(struct dirent *dirent, size_t size)
  1212. {
  1213. // If we don't have an equation to use yet/anymore, get a new one
  1214. // from the stack
  1215. while (true) {
  1216. if (fIterator == NULL) {
  1217. if (!fStack.Pop(&fCurrent)
  1218. || fCurrent == NULL
  1219. || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator,
  1220. false/*fFlags & B_QUERY_NON_INDEXED*/) < B_OK)
  1221. return B_ENTRY_NOT_FOUND;
  1222. }
  1223. if (fCurrent == NULL)
  1224. RETURN_ERROR(B_ERROR);
  1225. status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size);
  1226. if (status < B_OK) {
  1227. delete fIterator;
  1228. fIterator = NULL;
  1229. fCurrent = NULL;
  1230. } else {
  1231. // only return if we have another entry
  1232. return B_OK;
  1233. }
  1234. }
  1235. }
  1236. void
  1237. Query::SetLiveMode(port_id port, int32 token)
  1238. {
  1239. fPort = port;
  1240. fToken = token;
  1241. if ((fFlags & B_LIVE_QUERY) == 0) {
  1242. // you can decide at any point to set the live query mode,
  1243. // only live queries have to be updated by attribute changes
  1244. fFlags |= B_LIVE_QUERY;
  1245. fVolume->AddQuery(this);
  1246. }
  1247. }
  1248. void
  1249. Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
  1250. size_t oldLength, const uint8 *newKey, size_t newLength)
  1251. {
  1252. if (fPort < 0 || fExpression == NULL || attribute == NULL)
  1253. return;
  1254. // ToDo: check if the attribute is part of the query at all...
  1255. status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength);
  1256. status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength);
  1257. int32 op;
  1258. if (oldStatus == MATCH_OK && newStatus == MATCH_OK) {
  1259. // only send out a notification if the name was changed
  1260. if (oldKey == NULL || strcmp(attribute, "name"))
  1261. return;
  1262. send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0,
  1263. fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey);
  1264. op = B_ENTRY_CREATED;
  1265. } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) {
  1266. // nothing has changed
  1267. return;
  1268. } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK)
  1269. op = B_ENTRY_REMOVED;
  1270. else
  1271. op = B_ENTRY_CREATED;
  1272. // if "value" is NULL, send_notification() crashes...
  1273. const char *value = (const char *)newKey;
  1274. if (type != B_STRING_TYPE || value == NULL)
  1275. value = "";
  1276. send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0,
  1277. fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value);
  1278. }