PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/db/query.cpp

https://github.com/phildarnowsky/mongo
C++ | 912 lines | 732 code | 86 blank | 94 comment | 178 complexity | 34d61735b4826a6e87f38116d3c46321 MD5 | raw file
  1. // query.cpp
  2. /**
  3. * Copyright (C) 2008 10gen Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License, version 3,
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "stdafx.h"
  18. #include "query.h"
  19. #include "pdfile.h"
  20. #include "jsobjmanipulator.h"
  21. #include "../util/builder.h"
  22. #include <time.h>
  23. #include "introspect.h"
  24. #include "btree.h"
  25. #include "../util/lruishmap.h"
  26. #include "json.h"
  27. #include "repl.h"
  28. #include "replset.h"
  29. #include "scanandorder.h"
  30. #include "security.h"
  31. #include "curop.h"
  32. #include "commands.h"
  33. #include "queryoptimizer.h"
  34. #include "lasterror.h"
  35. namespace mongo {
  36. /* We cut off further objects once we cross this threshold; thus, you might get
  37. a little bit more than this, it is a threshold rather than a limit.
  38. */
  39. const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
  40. //ns->query->DiskLoc
  41. // LRUishMap<BSONObj,DiskLoc,5> lrutest(123);
  42. extern bool useCursors;
  43. extern bool useHints;
  44. // Just try to identify best plan.
  45. class DeleteOp : public QueryOp {
  46. public:
  47. DeleteOp( bool justOne, int& bestCount ) :
  48. justOne_( justOne ),
  49. count_(),
  50. bestCount_( bestCount ),
  51. nScanned_() {
  52. }
  53. virtual void init() {
  54. c_ = qp().newCursor();
  55. matcher_.reset( new CoveredIndexMatcher( qp().query(), qp().indexKey() ) );
  56. }
  57. virtual void next() {
  58. if ( !c_->ok() ) {
  59. setComplete();
  60. return;
  61. }
  62. DiskLoc rloc = c_->currLoc();
  63. if ( matcher_->matches(c_->currKey(), rloc ) ) {
  64. if ( !c_->getsetdup(rloc) )
  65. ++count_;
  66. }
  67. c_->advance();
  68. ++nScanned_;
  69. if ( count_ > bestCount_ )
  70. bestCount_ = count_;
  71. if ( count_ > 0 ) {
  72. if ( justOne_ )
  73. setComplete();
  74. else if ( nScanned_ >= 100 && count_ == bestCount_ )
  75. setComplete();
  76. }
  77. }
  78. virtual bool mayRecordPlan() const { return !justOne_; }
  79. virtual QueryOp *clone() const {
  80. return new DeleteOp( justOne_, bestCount_ );
  81. }
  82. auto_ptr< Cursor > newCursor() const { return qp().newCursor(); }
  83. private:
  84. bool justOne_;
  85. int count_;
  86. int &bestCount_;
  87. long long nScanned_;
  88. auto_ptr< Cursor > c_;
  89. auto_ptr< CoveredIndexMatcher > matcher_;
  90. };
  91. /* ns: namespace, e.g. <database>.<collection>
  92. pattern: the "where" clause / criteria
  93. justOne: stop after 1 match
  94. */
  95. int deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop, bool god) {
  96. if( !god ) {
  97. if ( strstr(ns, ".system.") ) {
  98. /* note a delete from system.indexes would corrupt the db
  99. if done here, as there are pointers into those objects in
  100. NamespaceDetails.
  101. */
  102. uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
  103. }
  104. if ( strchr( ns , '$' ) ){
  105. log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
  106. uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
  107. }
  108. }
  109. NamespaceDetails *d = nsdetails( ns );
  110. if ( ! d )
  111. return 0;
  112. uassert( 10101 , "can't remove from a capped collection" , ! d->capped );
  113. int nDeleted = 0;
  114. QueryPlanSet s( ns, pattern, BSONObj() );
  115. int best = 0;
  116. DeleteOp original( justOne, best );
  117. shared_ptr< DeleteOp > bestOp = s.runOp( original );
  118. auto_ptr< Cursor > creal = bestOp->newCursor();
  119. if( !creal->ok() )
  120. return nDeleted;
  121. CoveredIndexMatcher matcher(pattern, creal->indexKeyPattern());
  122. auto_ptr<ClientCursor> cc( new ClientCursor(creal, ns, false) );
  123. cc->setDoingDeletes( true );
  124. CursorId id = cc->cursorid;
  125. unsigned long long nScanned = 0;
  126. do {
  127. if ( ++nScanned % 128 == 0 && !matcher.docMatcher().atomic() ) {
  128. if ( ! cc->yield() ){
  129. cc.release(); // has already been deleted elsewhere
  130. break;
  131. }
  132. }
  133. // this way we can avoid calling updateLocation() every time (expensive)
  134. // as well as some other nuances handled
  135. cc->setDoingDeletes( true );
  136. DiskLoc rloc = cc->c->currLoc();
  137. BSONObj key = cc->c->currKey();
  138. cc->c->advance();
  139. if ( ! matcher.matches( key , rloc ) )
  140. continue;
  141. assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it!
  142. if ( !justOne ) {
  143. /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore
  144. blocks. here we might call millions of times which would be bad.
  145. */
  146. cc->c->noteLocation();
  147. }
  148. if ( logop ) {
  149. BSONElement e;
  150. if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
  151. BSONObjBuilder b;
  152. b.append( e );
  153. bool replJustOne = true;
  154. logOp( "d", ns, b.done(), 0, &replJustOne );
  155. } else {
  156. problem() << "deleted object without id, not logging" << endl;
  157. }
  158. }
  159. theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
  160. nDeleted++;
  161. if ( justOne )
  162. break;
  163. cc->c->checkLocation();
  164. } while ( cc->c->ok() );
  165. if ( cc.get() && ClientCursor::find( id , false ) == 0 ){
  166. cc.release();
  167. }
  168. return nDeleted;
  169. }
  170. int otherTraceLevel = 0;
  171. int initialExtentSize(int len);
  172. bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
  173. try {
  174. return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
  175. }
  176. catch ( AssertionException& e ) {
  177. if ( !e.msg.empty() )
  178. anObjBuilder.append("assertion", e.msg);
  179. }
  180. curop.debug().str << " assertion ";
  181. anObjBuilder.append("errmsg", "db assertion failure");
  182. anObjBuilder.append("ok", 0.0);
  183. BSONObj x = anObjBuilder.done();
  184. b.append((void*) x.objdata(), x.objsize());
  185. return true;
  186. }
  187. int nCaught = 0;
  188. void killCursors(int n, long long *ids) {
  189. int k = 0;
  190. for ( int i = 0; i < n; i++ ) {
  191. if ( ClientCursor::erase(ids[i]) )
  192. k++;
  193. }
  194. log( k == n ) << "killcursors: found " << k << " of " << n << '\n';
  195. }
  196. BSONObj id_obj = fromjson("{\"_id\":ObjectId( \"000000000000000000000000\" )}");
  197. BSONObj empty_obj = fromjson("{}");
  198. /* This is for languages whose "objects" are not well ordered (JSON is well ordered).
  199. [ { a : ... } , { b : ... } ] -> { a : ..., b : ... }
  200. */
  201. inline BSONObj transformOrderFromArrayFormat(BSONObj order) {
  202. /* note: this is slow, but that is ok as order will have very few pieces */
  203. BSONObjBuilder b;
  204. char p[2] = "0";
  205. while ( 1 ) {
  206. BSONObj j = order.getObjectField(p);
  207. if ( j.isEmpty() )
  208. break;
  209. BSONElement e = j.firstElement();
  210. uassert( 10102 , "bad order array", !e.eoo());
  211. uassert( 10103 , "bad order array [2]", e.isNumber());
  212. b.append(e);
  213. (*p)++;
  214. uassert( 10104 , "too many ordering elements", *p <= '9');
  215. }
  216. return b.obj();
  217. }
  218. //int dump = 0;
  219. /* empty result for error conditions */
  220. QueryResult* emptyMoreResult(long long cursorid) {
  221. BufBuilder b(32768);
  222. b.skip(sizeof(QueryResult));
  223. QueryResult *qr = (QueryResult *) b.buf();
  224. qr->cursorId = 0; // 0 indicates no more data to retrieve.
  225. qr->startingFrom = 0;
  226. qr->len = b.len();
  227. qr->setOperation(opReply);
  228. qr->nReturned = 0;
  229. b.decouple();
  230. return qr;
  231. }
  232. QueryResult* getMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop ) {
  233. StringBuilder& ss = curop.debug().str;
  234. ClientCursor::Pointer p(cursorid);
  235. ClientCursor *cc = p._c;
  236. int bufSize = 512;
  237. if ( cc ){
  238. bufSize += sizeof( QueryResult );
  239. bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024;
  240. }
  241. BufBuilder b( bufSize );
  242. b.skip(sizeof(QueryResult));
  243. int resultFlags = 0; //QueryResult::ResultFlag_AwaitCapable;
  244. int start = 0;
  245. int n = 0;
  246. if ( !cc ) {
  247. log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
  248. cursorid = 0;
  249. resultFlags = QueryResult::ResultFlag_CursorNotFound;
  250. }
  251. else {
  252. ss << " query: " << cc->query << " ";
  253. start = cc->pos;
  254. Cursor *c = cc->c.get();
  255. c->checkLocation();
  256. while ( 1 ) {
  257. if ( !c->ok() ) {
  258. if ( c->tailable() ) {
  259. if ( c->advance() ) {
  260. continue;
  261. }
  262. break;
  263. }
  264. p.release();
  265. bool ok = ClientCursor::erase(cursorid);
  266. assert(ok);
  267. cursorid = 0;
  268. cc = 0;
  269. break;
  270. }
  271. if ( !cc->matcher->matches(c->currKey(), c->currLoc() ) ) {
  272. }
  273. else {
  274. //out() << "matches " << c->currLoc().toString() << '\n';
  275. if( c->getsetdup(c->currLoc()) ) {
  276. //out() << " but it's a dup \n";
  277. }
  278. else {
  279. BSONObj js = c->current();
  280. fillQueryResultFromObj(b, cc->filter.get(), js);
  281. n++;
  282. if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) ||
  283. (ntoreturn==0 && b.len()>1*1024*1024) ) {
  284. c->advance();
  285. cc->pos += n;
  286. //cc->updateLocation();
  287. break;
  288. }
  289. }
  290. }
  291. c->advance();
  292. }
  293. if ( cc ) {
  294. cc->updateLocation();
  295. cc->mayUpgradeStorage();
  296. }
  297. }
  298. QueryResult *qr = (QueryResult *) b.buf();
  299. qr->len = b.len();
  300. qr->setOperation(opReply);
  301. qr->_resultFlags() = resultFlags;
  302. qr->cursorId = cursorid;
  303. qr->startingFrom = start;
  304. qr->nReturned = n;
  305. b.decouple();
  306. return qr;
  307. }
  308. class CountOp : public QueryOp {
  309. public:
  310. CountOp( const BSONObj &spec ) : spec_( spec ), count_(), bc_() {}
  311. virtual void init() {
  312. query_ = spec_.getObjectField( "query" );
  313. c_ = qp().newCursor();
  314. matcher_.reset( new CoveredIndexMatcher( query_, c_->indexKeyPattern() ) );
  315. if ( qp().exactKeyMatch() && ! matcher_->needRecord() ) {
  316. query_ = qp().simplifiedQuery( qp().indexKey() );
  317. bc_ = dynamic_cast< BtreeCursor* >( c_.get() );
  318. bc_->forgetEndKey();
  319. }
  320. skip_ = spec_["skip"].numberLong();
  321. limit_ = spec_["limit"].numberLong();
  322. }
  323. virtual void next() {
  324. if ( !c_->ok() ) {
  325. setComplete();
  326. return;
  327. }
  328. if ( bc_ ) {
  329. if ( firstMatch_.isEmpty() ) {
  330. firstMatch_ = bc_->currKeyNode().key;
  331. // if not match
  332. if ( query_.woCompare( firstMatch_, BSONObj(), false ) ) {
  333. setComplete();
  334. return;
  335. }
  336. _gotOne();
  337. } else {
  338. if ( !firstMatch_.woEqual( bc_->currKeyNode().key ) ) {
  339. setComplete();
  340. return;
  341. }
  342. _gotOne();
  343. }
  344. } else {
  345. if ( !matcher_->matches(c_->currKey(), c_->currLoc() ) ) {
  346. }
  347. else if( !c_->getsetdup(c_->currLoc()) ) {
  348. _gotOne();
  349. }
  350. }
  351. c_->advance();
  352. }
  353. virtual QueryOp *clone() const {
  354. return new CountOp( spec_ );
  355. }
  356. long long count() const { return count_; }
  357. virtual bool mayRecordPlan() const { return true; }
  358. private:
  359. void _gotOne(){
  360. if ( skip_ ){
  361. skip_--;
  362. return;
  363. }
  364. if ( limit_ > 0 && count_ >= limit_ ){
  365. setComplete();
  366. return;
  367. }
  368. count_++;
  369. }
  370. BSONObj spec_;
  371. long long count_;
  372. long long skip_;
  373. long long limit_;
  374. auto_ptr< Cursor > c_;
  375. BSONObj query_;
  376. BtreeCursor *bc_;
  377. auto_ptr< CoveredIndexMatcher > matcher_;
  378. BSONObj firstMatch_;
  379. };
  380. /* { count: "collectionname"[, query: <query>] }
  381. returns -1 on ns does not exist error.
  382. */
  383. long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
  384. NamespaceDetails *d = nsdetails( ns );
  385. if ( !d ) {
  386. err = "ns missing";
  387. return -1;
  388. }
  389. BSONObj query = cmd.getObjectField("query");
  390. // count of all objects
  391. if ( query.isEmpty() ){
  392. long long num = d->nrecords;
  393. num = num - cmd["skip"].numberLong();
  394. if ( num < 0 ) {
  395. num = 0;
  396. }
  397. if ( cmd["limit"].isNumber() ){
  398. long long limit = cmd["limit"].numberLong();
  399. if ( limit < num ){
  400. num = limit;
  401. }
  402. }
  403. return num;
  404. }
  405. QueryPlanSet qps( ns, query, BSONObj() );
  406. CountOp original( cmd );
  407. shared_ptr< CountOp > res = qps.runOp( original );
  408. if ( !res->complete() ) {
  409. log() << "Count with ns: " << ns << " and query: " << query
  410. << " failed with exception: " << res->exceptionMessage()
  411. << endl;
  412. return 0;
  413. }
  414. return res->count();
  415. }
  416. // Implements database 'query' requests using the query optimizer's QueryOp interface
  417. class UserQueryOp : public QueryOp {
  418. public:
  419. UserQueryOp( int ntoskip, int ntoreturn, const BSONObj &order, bool wantMore,
  420. bool explain, FieldMatcher *filter, int queryOptions ) :
  421. b_( 32768 ),
  422. ntoskip_( ntoskip ),
  423. ntoreturn_( ntoreturn ),
  424. order_( order ),
  425. wantMore_( wantMore ),
  426. explain_( explain ),
  427. filter_( filter ),
  428. ordering_(),
  429. nscanned_(),
  430. queryOptions_( queryOptions ),
  431. n_(),
  432. soSize_(),
  433. saveClientCursor_(),
  434. findingStart_( (queryOptions & QueryOption_OplogReplay) != 0 ),
  435. findingStartCursor_()
  436. {
  437. uassert( 10105 , "bad skip value in query", ntoskip >= 0);
  438. }
  439. virtual void init() {
  440. b_.skip( sizeof( QueryResult ) );
  441. // findingStart mode is used to find the first operation of interest when
  442. // we are scanning through a repl log. For efficiency in the common case,
  443. // where the first operation of interest is closer to the tail than the head,
  444. // we start from the tail of the log and work backwards until we find the
  445. // first operation of interest. Then we scan forward from that first operation,
  446. // actually returning results to the client. During the findingStart phase,
  447. // we release the db mutex occasionally to avoid blocking the db process for
  448. // an extended period of time.
  449. if ( findingStart_ ) {
  450. // Use a ClientCursor here so we can release db mutex while scanning
  451. // oplog (can take quite a while with large oplogs).
  452. auto_ptr<Cursor> c = qp().newReverseCursor();
  453. findingStartCursor_ = new ClientCursor(c, qp().ns(), false);
  454. } else {
  455. c_ = qp().newCursor();
  456. }
  457. matcher_.reset(new CoveredIndexMatcher(qp().query(), qp().indexKey()));
  458. if ( qp().scanAndOrderRequired() ) {
  459. ordering_ = true;
  460. so_.reset( new ScanAndOrder( ntoskip_, ntoreturn_, order_ ) );
  461. wantMore_ = false;
  462. }
  463. }
  464. virtual void next() {
  465. if ( findingStart_ ) {
  466. if ( !findingStartCursor_ || !findingStartCursor_->c->ok() ) {
  467. findingStart_ = false;
  468. c_ = qp().newCursor();
  469. } else if ( !matcher_->matches( findingStartCursor_->c->currKey(), findingStartCursor_->c->currLoc() ) ) {
  470. findingStart_ = false;
  471. c_ = qp().newCursor( findingStartCursor_->c->currLoc() );
  472. } else {
  473. findingStartCursor_->c->advance();
  474. RARELY {
  475. CursorId id = findingStartCursor_->cursorid;
  476. findingStartCursor_->updateLocation();
  477. {
  478. dbtemprelease t;
  479. }
  480. findingStartCursor_ = ClientCursor::find( id, false );
  481. }
  482. return;
  483. }
  484. }
  485. if ( findingStartCursor_ ) {
  486. ClientCursor::erase( findingStartCursor_->cursorid );
  487. findingStartCursor_ = 0;
  488. }
  489. if ( !c_->ok() ) {
  490. finish();
  491. return;
  492. }
  493. bool mayCreateCursor1 = wantMore_ && ntoreturn_ != 1 && useCursors;
  494. if( 0 ) {
  495. BSONObj js = c_->current();
  496. cout << "SCANNING " << js << endl;
  497. }
  498. nscanned_++;
  499. if ( !matcher_->matches(c_->currKey(), c_->currLoc() ) ) {
  500. ;
  501. }
  502. else {
  503. DiskLoc cl = c_->currLoc();
  504. if( !c_->getsetdup(cl) ) {
  505. BSONObj js = c_->current();
  506. // got a match.
  507. assert( js.objsize() >= 0 ); //defensive for segfaults
  508. if ( ordering_ ) {
  509. // note: no cursors for non-indexed, ordered results. results must be fairly small.
  510. so_->add(js);
  511. }
  512. else if ( ntoskip_ > 0 ) {
  513. ntoskip_--;
  514. } else {
  515. if ( explain_ ) {
  516. n_++;
  517. if ( n_ >= ntoreturn_ && !wantMore_ ) {
  518. // .limit() was used, show just that much.
  519. finish();
  520. return;
  521. }
  522. }
  523. else {
  524. fillQueryResultFromObj(b_, filter_, js);
  525. n_++;
  526. if ( (ntoreturn_>0 && (n_ >= ntoreturn_ || b_.len() > MaxBytesToReturnToClientAtOnce)) ||
  527. (ntoreturn_==0 && (b_.len()>1*1024*1024 || n_>=101)) ) {
  528. /* if ntoreturn is zero, we return up to 101 objects. on the subsequent getmore, there
  529. is only a size limit. The idea is that on a find() where one doesn't use much results,
  530. we don't return much, but once getmore kicks in, we start pushing significant quantities.
  531. The n limit (vs. size) is important when someone fetches only one small field from big
  532. objects, which causes massive scanning server-side.
  533. */
  534. /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
  535. if ( mayCreateCursor1 ) {
  536. c_->advance();
  537. if ( c_->ok() ) {
  538. // more...so save a cursor
  539. saveClientCursor_ = true;
  540. }
  541. }
  542. finish();
  543. return;
  544. }
  545. }
  546. }
  547. }
  548. }
  549. c_->advance();
  550. }
  551. void finish() {
  552. if ( explain_ ) {
  553. n_ = ordering_ ? so_->size() : n_;
  554. } else if ( ordering_ ) {
  555. so_->fill(b_, filter_, n_);
  556. }
  557. if ( mayCreateCursor2() ) {
  558. c_->setTailable();
  559. }
  560. // If the tailing request succeeded.
  561. if ( c_->tailable() ) {
  562. saveClientCursor_ = true;
  563. }
  564. setComplete();
  565. }
  566. virtual bool mayRecordPlan() const { return ntoreturn_ != 1; }
  567. virtual QueryOp *clone() const {
  568. return new UserQueryOp( ntoskip_, ntoreturn_, order_, wantMore_, explain_, filter_, queryOptions_ );
  569. }
  570. BufBuilder &builder() { return b_; }
  571. bool scanAndOrderRequired() const { return ordering_; }
  572. auto_ptr< Cursor > cursor() { return c_; }
  573. auto_ptr< CoveredIndexMatcher > matcher() { return matcher_; }
  574. int n() const { return n_; }
  575. long long nscanned() const { return nscanned_; }
  576. bool saveClientCursor() const { return saveClientCursor_; }
  577. bool mayCreateCursor2() const { return ( queryOptions_ & QueryOption_CursorTailable ) && ntoreturn_ != 1; }
  578. private:
  579. BufBuilder b_;
  580. int ntoskip_;
  581. int ntoreturn_;
  582. BSONObj order_;
  583. bool wantMore_;
  584. bool explain_;
  585. FieldMatcher *filter_;
  586. bool ordering_;
  587. auto_ptr< Cursor > c_;
  588. long long nscanned_;
  589. int queryOptions_;
  590. auto_ptr< CoveredIndexMatcher > matcher_;
  591. int n_;
  592. int soSize_;
  593. bool saveClientCursor_;
  594. auto_ptr< ScanAndOrder > so_;
  595. bool findingStart_;
  596. ClientCursor * findingStartCursor_;
  597. };
  598. /* run a query -- includes checking for and running a Command */
  599. auto_ptr< QueryResult > runQuery(Message& m, QueryMessage& q, CurOp& curop ) {
  600. StringBuilder& ss = curop.debug().str;
  601. const char *ns = q.ns;
  602. int ntoskip = q.ntoskip;
  603. int _ntoreturn = q.ntoreturn;
  604. BSONObj jsobj = q.query;
  605. auto_ptr< FieldMatcher > filter = q.fields; // what fields to return (unspecified = full object)
  606. int queryOptions = q.queryOptions;
  607. BSONObj snapshotHint;
  608. Timer t;
  609. if( logLevel >= 2 )
  610. log() << "runQuery: " << ns << jsobj << endl;
  611. long long nscanned = 0;
  612. bool wantMore = true;
  613. int ntoreturn = _ntoreturn;
  614. if ( _ntoreturn < 0 ) {
  615. /* _ntoreturn greater than zero is simply a hint on how many objects to send back per
  616. "cursor batch".
  617. A negative number indicates a hard limit.
  618. */
  619. ntoreturn = -_ntoreturn;
  620. wantMore = false;
  621. }
  622. ss << "query " << ns << " ntoreturn:" << ntoreturn;
  623. curop.setQuery(jsobj);
  624. BufBuilder bb;
  625. BSONObjBuilder cmdResBuf;
  626. long long cursorid = 0;
  627. bb.skip(sizeof(QueryResult));
  628. auto_ptr< QueryResult > qr;
  629. int n = 0;
  630. Client& c = cc();
  631. /* we assume you are using findOne() for running a cmd... */
  632. if ( ntoreturn == 1 && runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
  633. n = 1;
  634. qr.reset( (QueryResult *) bb.buf() );
  635. bb.decouple();
  636. qr->setResultFlagsToOk();
  637. qr->len = bb.len();
  638. ss << " reslen:" << bb.len();
  639. // qr->channel = 0;
  640. qr->setOperation(opReply);
  641. qr->cursorId = cursorid;
  642. qr->startingFrom = 0;
  643. qr->nReturned = n;
  644. }
  645. else {
  646. /* regular query */
  647. AuthenticationInfo *ai = currentClient.get()->ai;
  648. uassert( 10106 , "unauthorized", ai->isAuthorized(c.database()->name.c_str()));
  649. /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair
  650. so that queries to a pair are realtime consistent as much as possible. use setSlaveOk() to
  651. query the nonmaster member of a replica pair.
  652. */
  653. uassert( 10107 , "not master", isMaster() || (queryOptions & QueryOption_SlaveOk) || slave == SimpleSlave );
  654. BSONElement hint;
  655. BSONObj min;
  656. BSONObj max;
  657. bool explain = false;
  658. bool _gotquery = false;
  659. bool snapshot = false;
  660. BSONObj query;
  661. {
  662. BSONElement e = jsobj.findElement("$query");
  663. if ( e.eoo() )
  664. e = jsobj.findElement("query");
  665. if ( !e.eoo() && (e.type() == Object || e.type() == Array) ) {
  666. query = e.embeddedObject();
  667. _gotquery = true;
  668. }
  669. }
  670. BSONObj order;
  671. {
  672. BSONElement e = jsobj.findElement("$orderby");
  673. if ( e.eoo() )
  674. e = jsobj.findElement("orderby");
  675. if ( !e.eoo() ) {
  676. order = e.embeddedObjectUserCheck();
  677. if ( e.type() == Array )
  678. order = transformOrderFromArrayFormat(order);
  679. }
  680. }
  681. if ( !_gotquery && order.isEmpty() )
  682. query = jsobj;
  683. else {
  684. explain = jsobj.getBoolField("$explain");
  685. if ( useHints )
  686. hint = jsobj.getField("$hint");
  687. min = jsobj.getObjectField("$min");
  688. max = jsobj.getObjectField("$max");
  689. BSONElement e = jsobj.getField("$snapshot");
  690. snapshot = !e.eoo() && e.trueValue();
  691. if( snapshot ) {
  692. uassert( 12001 , "E12001 can't sort with $snapshot", order.isEmpty());
  693. uassert( 12002 , "E12002 can't use hint with $snapshot", hint.eoo());
  694. NamespaceDetails *d = nsdetails(ns);
  695. if ( d ){
  696. int i = d->findIdIndex();
  697. if( i < 0 ) {
  698. if ( strstr( ns , ".system." ) == 0 )
  699. log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
  700. }
  701. else {
  702. /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here.
  703. probably need a better way to specify "use the _id index" as a hint. if someone is
  704. in the query optimizer please fix this then!
  705. */
  706. BSONObjBuilder b;
  707. b.append("$hint", d->idx(i).indexName());
  708. snapshotHint = b.obj();
  709. hint = snapshotHint.firstElement();
  710. }
  711. }
  712. }
  713. }
  714. /* The ElemIter will not be happy if this isn't really an object. So throw exception
  715. here when that is true.
  716. (Which may indicate bad data from client.)
  717. */
  718. if ( query.objsize() == 0 ) {
  719. out() << "Bad query object?\n jsobj:";
  720. out() << jsobj.toString() << "\n query:";
  721. out() << query.toString() << endl;
  722. uassert( 10110 , "bad query object", false);
  723. }
  724. bool idHackWorked = false;
  725. if ( strcmp( query.firstElement().fieldName() , "_id" ) == 0 && query.nFields() == 1 && query.firstElement().isSimpleType() ){
  726. nscanned = 1;
  727. bool nsFound = false;
  728. bool indexFound = false;
  729. BSONObj resObject;
  730. bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
  731. if ( nsFound == false || indexFound == true ){
  732. idHackWorked = true;
  733. if ( found ){
  734. n = 1;
  735. fillQueryResultFromObj( bb , filter.get() , resObject );
  736. }
  737. qr.reset( (QueryResult *) bb.buf() );
  738. bb.decouple();
  739. qr->setResultFlagsToOk();
  740. qr->len = bb.len();
  741. ss << " reslen:" << bb.len();
  742. qr->setOperation(opReply);
  743. qr->cursorId = cursorid;
  744. qr->startingFrom = 0;
  745. qr->nReturned = n;
  746. }
  747. }
  748. if ( ! idHackWorked ){ // non-simple _id lookup
  749. BSONObj oldPlan;
  750. if ( explain && hint.eoo() && min.isEmpty() && max.isEmpty() ) {
  751. QueryPlanSet qps( ns, query, order );
  752. if ( qps.usingPrerecordedPlan() )
  753. oldPlan = qps.explain();
  754. }
  755. QueryPlanSet qps( ns, query, order, &hint, !explain, min, max );
  756. UserQueryOp original( ntoskip, ntoreturn, order, wantMore, explain, filter.get(), queryOptions );
  757. shared_ptr< UserQueryOp > o = qps.runOp( original );
  758. UserQueryOp &dqo = *o;
  759. massert( 10362 , dqo.exceptionMessage(), dqo.complete() );
  760. n = dqo.n();
  761. nscanned = dqo.nscanned();
  762. if ( dqo.scanAndOrderRequired() )
  763. ss << " scanAndOrder ";
  764. auto_ptr< Cursor > c = dqo.cursor();
  765. log( 5 ) << " used cursor: " << c.get() << endl;
  766. if ( dqo.saveClientCursor() ) {
  767. // the clientcursor now owns the Cursor* and 'c' is released:
  768. ClientCursor *cc = new ClientCursor(c, ns, !(queryOptions & QueryOption_NoCursorTimeout));
  769. cursorid = cc->cursorid;
  770. cc->query = jsobj.getOwned();
  771. DEV out() << " query has more, cursorid: " << cursorid << endl;
  772. cc->matcher = dqo.matcher();
  773. cc->pos = n;
  774. cc->filter = filter;
  775. cc->originalMessage = m;
  776. cc->updateLocation();
  777. if ( !cc->c->ok() && cc->c->tailable() ) {
  778. DEV out() << " query has no more but tailable, cursorid: " << cursorid << endl;
  779. } else {
  780. DEV out() << " query has more, cursorid: " << cursorid << endl;
  781. }
  782. }
  783. if ( explain ) {
  784. BSONObjBuilder builder;
  785. builder.append("cursor", c->toString());
  786. builder.append("startKey", c->prettyStartKey());
  787. builder.append("endKey", c->prettyEndKey());
  788. builder.append("nscanned", double( dqo.nscanned() ) );
  789. builder.append("n", n);
  790. if ( dqo.scanAndOrderRequired() )
  791. builder.append("scanAndOrder", true);
  792. builder.append("millis", t.millis());
  793. if ( !oldPlan.isEmpty() )
  794. builder.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
  795. if ( hint.eoo() )
  796. builder.appendElements(qps.explain());
  797. BSONObj obj = builder.done();
  798. fillQueryResultFromObj(dqo.builder(), 0, obj);
  799. n = 1;
  800. }
  801. qr.reset( (QueryResult *) dqo.builder().buf() );
  802. dqo.builder().decouple();
  803. qr->cursorId = cursorid;
  804. qr->setResultFlagsToOk();
  805. qr->len = dqo.builder().len();
  806. ss << " reslen:" << qr->len;
  807. qr->setOperation(opReply);
  808. qr->startingFrom = 0;
  809. qr->nReturned = n;
  810. }
  811. }
  812. int duration = t.millis();
  813. Database *database = c.database();
  814. if ( (database && database->profile) || duration >= 100 ) {
  815. ss << " nscanned:" << nscanned << ' ';
  816. if ( ntoskip )
  817. ss << " ntoskip:" << ntoskip;
  818. if ( database && database->profile )
  819. ss << " \nquery: ";
  820. ss << jsobj << ' ';
  821. }
  822. ss << " nreturned:" << n;
  823. return qr;
  824. }
  825. } // namespace mongo