PageRenderTime 57ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/db/query.cpp

https://github.com/niallo/mongo
C++ | 1107 lines | 881 code | 136 blank | 90 comment | 214 complexity | 49394fdf8bd1e5f82c44f2d2d5e49054 MD5 | raw file
  1. // query.cpp
  2. /**
  3. * Copyright (C) 2008 10gen Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License, version 3,
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "pch.h"
  18. #include "query.h"
  19. #include "pdfile.h"
  20. #include "jsobjmanipulator.h"
  21. #include "../bson/util/builder.h"
  22. #include <time.h>
  23. #include "introspect.h"
  24. #include "btree.h"
  25. #include "../util/lruishmap.h"
  26. #include "json.h"
  27. #include "repl.h"
  28. #include "replpair.h"
  29. #include "scanandorder.h"
  30. #include "security.h"
  31. #include "curop.h"
  32. #include "commands.h"
  33. #include "queryoptimizer.h"
  34. #include "lasterror.h"
  35. #include "../s/d_logic.h"
  36. #include "repl_block.h"
  37. namespace mongo {
  38. /* We cut off further objects once we cross this threshold; thus, you might get
  39. a little bit more than this, it is a threshold rather than a limit.
  40. */
  41. const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
  42. //ns->query->DiskLoc
  43. // LRUishMap<BSONObj,DiskLoc,5> lrutest(123);
  44. extern bool useCursors;
  45. extern bool useHints;
  46. // Just try to identify best plan.
  47. class DeleteOp : public MultiCursor::CursorOp {
  48. public:
  49. DeleteOp( bool justOne, int& bestCount ) :
  50. justOne_( justOne ),
  51. count_(),
  52. bestCount_( bestCount ),
  53. _nscanned() {
  54. }
  55. virtual void _init() {
  56. c_ = qp().newCursor();
  57. }
  58. virtual bool prepareToYield() {
  59. if ( ! _cc ) {
  60. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) );
  61. }
  62. return _cc->prepareToYield( _yieldData );
  63. }
  64. virtual void recoverFromYield() {
  65. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  66. _cc.reset();
  67. c_.reset();
  68. massert( 13340, "cursor dropped during delete", false );
  69. }
  70. }
  71. virtual long long nscanned() {
  72. assert( c_.get() );
  73. return c_->nscanned();
  74. }
  75. virtual void next() {
  76. if ( !c_->ok() ) {
  77. setComplete();
  78. return;
  79. }
  80. DiskLoc rloc = c_->currLoc();
  81. if ( matcher()->matches(c_->currKey(), rloc ) ) {
  82. if ( !c_->getsetdup(rloc) )
  83. ++count_;
  84. }
  85. c_->advance();
  86. _nscanned = c_->nscanned();
  87. if ( count_ > bestCount_ )
  88. bestCount_ = count_;
  89. if ( count_ > 0 ) {
  90. if ( justOne_ )
  91. setComplete();
  92. else if ( _nscanned >= 100 && count_ == bestCount_ )
  93. setComplete();
  94. }
  95. }
  96. virtual bool mayRecordPlan() const { return !justOne_; }
  97. virtual QueryOp *_createChild() const {
  98. bestCount_ = 0; // should be safe to reset this in contexts where createChild() is called
  99. return new DeleteOp( justOne_, bestCount_ );
  100. }
  101. virtual shared_ptr<Cursor> newCursor() const { return qp().newCursor(); }
  102. private:
  103. bool justOne_;
  104. int count_;
  105. int &bestCount_;
  106. long long _nscanned;
  107. shared_ptr<Cursor> c_;
  108. ClientCursor::CleanupPointer _cc;
  109. ClientCursor::YieldData _yieldData;
  110. };
  111. /* ns: namespace, e.g. <database>.<collection>
  112. pattern: the "where" clause / criteria
  113. justOne: stop after 1 match
  114. god: allow access to system namespaces, and don't yield
  115. */
  116. long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
  117. if( !god ) {
  118. if ( strstr(ns, ".system.") ) {
  119. /* note a delete from system.indexes would corrupt the db
  120. if done here, as there are pointers into those objects in
  121. NamespaceDetails.
  122. */
  123. uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
  124. }
  125. if ( strchr( ns , '$' ) ){
  126. log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
  127. uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
  128. }
  129. }
  130. NamespaceDetails *d = nsdetails( ns );
  131. if ( ! d )
  132. return 0;
  133. uassert( 10101 , "can't remove from a capped collection" , ! d->capped );
  134. long long nDeleted = 0;
  135. int best = 0;
  136. shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) );
  137. shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, true ) );
  138. if( !creal->ok() )
  139. return nDeleted;
  140. shared_ptr< Cursor > cPtr = creal;
  141. auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
  142. cc->setDoingDeletes( true );
  143. CursorId id = cc->cursorid;
  144. bool justOne = justOneOrig;
  145. bool canYield = !god && !creal->matcher()->docMatcher().atomic();
  146. do {
  147. if ( canYield && ! cc->yieldSometimes() ){
  148. cc.release(); // has already been deleted elsewhere
  149. // TODO should we assert or something?
  150. break;
  151. }
  152. if ( !cc->c->ok() ) {
  153. break; // if we yielded, could have hit the end
  154. }
  155. // this way we can avoid calling updateLocation() every time (expensive)
  156. // as well as some other nuances handled
  157. cc->setDoingDeletes( true );
  158. DiskLoc rloc = cc->c->currLoc();
  159. BSONObj key = cc->c->currKey();
  160. // NOTE Calling advance() may change the matcher, so it's important
  161. // to try to match first.
  162. bool match = creal->matcher()->matches( key , rloc );
  163. if ( ! cc->c->advance() )
  164. justOne = true;
  165. if ( ! match )
  166. continue;
  167. assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it!
  168. if ( !justOne ) {
  169. /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore
  170. blocks. here we might call millions of times which would be bad.
  171. */
  172. cc->c->noteLocation();
  173. }
  174. if ( logop ) {
  175. BSONElement e;
  176. if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
  177. BSONObjBuilder b;
  178. b.append( e );
  179. bool replJustOne = true;
  180. logOp( "d", ns, b.done(), 0, &replJustOne );
  181. } else {
  182. problem() << "deleted object without id, not logging" << endl;
  183. }
  184. }
  185. if ( rs )
  186. rs->goingToDelete( rloc.obj() /*cc->c->current()*/ );
  187. theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
  188. nDeleted++;
  189. if ( justOne ) {
  190. break;
  191. }
  192. cc->c->checkLocation();
  193. } while ( cc->c->ok() );
  194. if ( cc.get() && ClientCursor::find( id , false ) == 0 ){
  195. cc.release();
  196. }
  197. return nDeleted;
  198. }
  199. int otherTraceLevel = 0;
  200. int initialExtentSize(int len);
  201. bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
  202. try {
  203. return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
  204. }
  205. catch ( AssertionException& e ) {
  206. e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
  207. }
  208. curop.debug().str << " assertion ";
  209. anObjBuilder.append("errmsg", "db assertion failure");
  210. anObjBuilder.append("ok", 0.0);
  211. BSONObj x = anObjBuilder.done();
  212. b.appendBuf((void*) x.objdata(), x.objsize());
  213. return true;
  214. }
  215. int nCaught = 0;
  216. void killCursors(int n, long long *ids) {
  217. int k = 0;
  218. for ( int i = 0; i < n; i++ ) {
  219. if ( ClientCursor::erase(ids[i]) )
  220. k++;
  221. }
  222. if ( logLevel > 0 || k != n ){
  223. log( k == n ) << "killcursors: found " << k << " of " << n << endl;
  224. }
  225. }
  226. BSONObj id_obj = fromjson("{\"_id\":1}");
  227. BSONObj empty_obj = fromjson("{}");
  228. //int dump = 0;
  229. /* empty result for error conditions */
  230. QueryResult* emptyMoreResult(long long cursorid) {
  231. BufBuilder b(32768);
  232. b.skip(sizeof(QueryResult));
  233. QueryResult *qr = (QueryResult *) b.buf();
  234. qr->cursorId = 0; // 0 indicates no more data to retrieve.
  235. qr->startingFrom = 0;
  236. qr->len = b.len();
  237. qr->setOperation(opReply);
  238. qr->nReturned = 0;
  239. b.decouple();
  240. return qr;
  241. }
  242. QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
  243. // log() << "TEMP GETMORE " << ns << ' ' << cursorid << ' ' << pass << endl;
  244. exhaust = false;
  245. ClientCursor::Pointer p(cursorid);
  246. ClientCursor *cc = p._c;
  247. int bufSize = 512;
  248. if ( cc ){
  249. bufSize += sizeof( QueryResult );
  250. bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024;
  251. }
  252. BufBuilder b( bufSize );
  253. b.skip(sizeof(QueryResult));
  254. int resultFlags = ResultFlag_AwaitCapable;
  255. int start = 0;
  256. int n = 0;
  257. if ( !cc ) {
  258. log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
  259. cursorid = 0;
  260. resultFlags = ResultFlag_CursorNotFound;
  261. }
  262. else {
  263. if ( pass == 0 )
  264. cc->updateSlaveLocation( curop );
  265. int queryOptions = cc->_queryOptions;
  266. if( pass == 0 ) {
  267. StringBuilder& ss = curop.debug().str;
  268. ss << " getMore: " << cc->query.toString() << " ";
  269. }
  270. start = cc->pos;
  271. Cursor *c = cc->c.get();
  272. c->checkLocation();
  273. DiskLoc last;
  274. while ( 1 ) {
  275. if ( !c->ok() ) {
  276. // log() << "TEMP Tailable : " << c->tailable() << ' ' << (queryOptions & QueryOption_AwaitData) << endl;
  277. if ( c->tailable() ) {
  278. /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
  279. advance() can still be retries as a reactivation attempt. when there is new data, it will
  280. return true. that's what we are doing here.
  281. */
  282. if ( c->advance() )
  283. continue;
  284. if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
  285. throw GetMoreWaitException();
  286. }
  287. break;
  288. }
  289. p.release();
  290. bool ok = ClientCursor::erase(cursorid);
  291. assert(ok);
  292. cursorid = 0;
  293. cc = 0;
  294. break;
  295. }
  296. // in some cases (clone collection) there won't be a matcher
  297. if ( c->matcher() && !c->matcher()->matches(c->currKey(), c->currLoc() ) ) {
  298. }
  299. /*
  300. TODO
  301. else if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( c->currKey(), c->currLoc() ) ){
  302. cout << "TEMP skipping un-owned chunk: " << c->current() << endl;
  303. }
  304. */
  305. else {
  306. if( c->getsetdup(c->currLoc()) ) {
  307. //out() << " but it's a dup \n";
  308. }
  309. else {
  310. last = c->currLoc();
  311. BSONObj js = c->current();
  312. // show disk loc should be part of the main query, not in an $or clause, so this should be ok
  313. fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
  314. n++;
  315. if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) ||
  316. (ntoreturn==0 && b.len()>1*1024*1024) ) {
  317. c->advance();
  318. cc->pos += n;
  319. break;
  320. }
  321. }
  322. }
  323. c->advance();
  324. }
  325. if ( cc ) {
  326. cc->updateLocation();
  327. cc->mayUpgradeStorage();
  328. cc->storeOpForSlave( last );
  329. exhaust = cc->_queryOptions & QueryOption_Exhaust;
  330. }
  331. }
  332. QueryResult *qr = (QueryResult *) b.buf();
  333. qr->len = b.len();
  334. qr->setOperation(opReply);
  335. qr->_resultFlags() = resultFlags;
  336. qr->cursorId = cursorid;
  337. qr->startingFrom = start;
  338. qr->nReturned = n;
  339. b.decouple();
  340. return qr;
  341. }
  342. class CountOp : public QueryOp {
  343. public:
  344. CountOp( const string& ns , const BSONObj &spec ) :
  345. _ns(ns), count_(),
  346. skip_( spec["skip"].numberLong() ),
  347. limit_( spec["limit"].numberLong() ),
  348. bc_(){
  349. }
  350. virtual void _init() {
  351. c_ = qp().newCursor();
  352. if ( qp().exactKeyMatch() && ! matcher()->needRecord() ) {
  353. query_ = qp().simplifiedQuery( qp().indexKey() );
  354. bc_ = dynamic_cast< BtreeCursor* >( c_.get() );
  355. bc_->forgetEndKey();
  356. }
  357. }
  358. virtual long long nscanned() {
  359. assert( c_.get() );
  360. return c_->nscanned();
  361. }
  362. virtual bool prepareToYield() {
  363. if ( ! _cc ) {
  364. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , _ns.c_str() ) );
  365. }
  366. return _cc->prepareToYield( _yieldData );
  367. }
  368. virtual void recoverFromYield() {
  369. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  370. c_.reset();
  371. _cc.reset();
  372. massert( 13337, "cursor dropped during count", false );
  373. // TODO maybe we want to prevent recording the winning plan as well?
  374. }
  375. }
  376. virtual void next() {
  377. if ( !c_->ok() ) {
  378. setComplete();
  379. return;
  380. }
  381. if ( bc_ ) {
  382. if ( firstMatch_.isEmpty() ) {
  383. firstMatch_ = bc_->currKeyNode().key;
  384. // if not match
  385. if ( query_.woCompare( firstMatch_, BSONObj(), false ) ) {
  386. setComplete();
  387. return;
  388. }
  389. _gotOne();
  390. } else {
  391. if ( !firstMatch_.woEqual( bc_->currKeyNode().key ) ) {
  392. setComplete();
  393. return;
  394. }
  395. _gotOne();
  396. }
  397. }
  398. else {
  399. if ( !matcher()->matches(c_->currKey(), c_->currLoc() ) ) {
  400. }
  401. else if( !c_->getsetdup(c_->currLoc()) ) {
  402. _gotOne();
  403. }
  404. }
  405. c_->advance();
  406. }
  407. virtual QueryOp *_createChild() const {
  408. CountOp *ret = new CountOp( _ns , BSONObj() );
  409. ret->count_ = count_;
  410. ret->skip_ = skip_;
  411. ret->limit_ = limit_;
  412. return ret;
  413. }
  414. long long count() const { return count_; }
  415. virtual bool mayRecordPlan() const { return true; }
  416. private:
  417. void _gotOne(){
  418. if ( skip_ ){
  419. skip_--;
  420. return;
  421. }
  422. if ( limit_ > 0 && count_ >= limit_ ){
  423. setStop();
  424. return;
  425. }
  426. count_++;
  427. }
  428. string _ns;
  429. long long count_;
  430. long long skip_;
  431. long long limit_;
  432. shared_ptr<Cursor> c_;
  433. BSONObj query_;
  434. BtreeCursor *bc_;
  435. BSONObj firstMatch_;
  436. ClientCursor::CleanupPointer _cc;
  437. ClientCursor::YieldData _yieldData;
  438. };
  439. /* { count: "collectionname"[, query: <query>] }
  440. returns -1 on ns does not exist error.
  441. */
  442. long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
  443. Client::Context cx(ns);
  444. NamespaceDetails *d = nsdetails( ns );
  445. if ( !d ) {
  446. err = "ns missing";
  447. return -1;
  448. }
  449. BSONObj query = cmd.getObjectField("query");
  450. // count of all objects
  451. if ( query.isEmpty() ){
  452. return applySkipLimit( d->nrecords , cmd );
  453. }
  454. MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
  455. CountOp original( ns , cmd );
  456. shared_ptr< CountOp > res = mps.runOp( original );
  457. if ( !res->complete() ) {
  458. log() << "Count with ns: " << ns << " and query: " << query
  459. << " failed with exception: " << res->exception()
  460. << endl;
  461. return 0;
  462. }
  463. return res->count();
  464. }
  465. class ExplainBuilder {
  466. public:
  467. ExplainBuilder() : _i() {}
  468. void ensureStartScan() {
  469. if ( !_a.get() ) {
  470. _a.reset( new BSONArrayBuilder() );
  471. }
  472. }
  473. void noteCursor( Cursor *c ) {
  474. BSONObjBuilder b( _a->subobjStart() );
  475. b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
  476. b.done();
  477. }
  478. void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, int millis, bool hint ) {
  479. if ( _i == 1 ) {
  480. _c.reset( new BSONArrayBuilder() );
  481. *_c << _b->obj();
  482. }
  483. if ( _i == 0 ) {
  484. _b.reset( new BSONObjBuilder() );
  485. } else {
  486. _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
  487. }
  488. *_b << "cursor" << c->toString();
  489. _b->appendNumber( "nscanned", nscanned );
  490. _b->appendNumber( "nscannedObjects", nscannedObjects );
  491. *_b << "n" << n;
  492. if ( scanAndOrder )
  493. *_b << "scanAndOrder" << true;
  494. *_b << "millis" << millis;
  495. *_b << "indexBounds" << c->prettyIndexBounds();
  496. if ( !hint ) {
  497. *_b << "allPlans" << _a->arr();
  498. }
  499. if ( _i != 0 ) {
  500. _b->done();
  501. }
  502. _a.reset( 0 );
  503. ++_i;
  504. }
  505. BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
  506. if ( _i > 1 ) {
  507. BSONObjBuilder b;
  508. b << "clauses" << _c->arr();
  509. b.appendNumber( "nscanned", nscanned );
  510. b.appendNumber( "nscanneObjects", nscannedObjects );
  511. b << "n" << n;
  512. b << "millis" << millis;
  513. b.appendElements( suffix );
  514. return b.obj();
  515. } else {
  516. _b->appendElements( suffix );
  517. return _b->obj();
  518. }
  519. }
  520. private:
  521. auto_ptr< BSONArrayBuilder > _a;
  522. auto_ptr< BSONObjBuilder > _b;
  523. auto_ptr< BSONArrayBuilder > _c;
  524. int _i;
  525. };
  526. // Implements database 'query' requests using the query optimizer's QueryOp interface
  527. class UserQueryOp : public QueryOp {
  528. public:
  529. UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
  530. _buf( 32768 ) , // TODO be smarter here
  531. _pq( pq ) ,
  532. _ntoskip( pq.getSkip() ) ,
  533. _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
  534. _n(0),
  535. _oldN(0),
  536. _chunkMatcher(shardingState.getChunkMatcher(pq.ns())),
  537. _inMemSort(false),
  538. _saveClientCursor(false),
  539. _wouldSaveClientCursor(false),
  540. _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
  541. _response( response ),
  542. _eb( eb ),
  543. _curop( curop )
  544. {}
  545. virtual void _init() {
  546. // only need to put the QueryResult fields there if we're building the first buffer in the message.
  547. if ( _response.empty() ) {
  548. _buf.skip( sizeof( QueryResult ) );
  549. }
  550. if ( _oplogReplay ) {
  551. _findingStartCursor.reset( new FindingStartCursor( qp() ) );
  552. } else {
  553. _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
  554. }
  555. if ( qp().scanAndOrderRequired() ) {
  556. _inMemSort = true;
  557. _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) );
  558. }
  559. if ( _pq.isExplain() ) {
  560. _eb.noteCursor( _c.get() );
  561. }
  562. }
  563. virtual bool prepareToYield() {
  564. if ( _findingStartCursor.get() ) {
  565. return _findingStartCursor->prepareToYield();
  566. } else {
  567. if ( ! _cc ) {
  568. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
  569. }
  570. return _cc->prepareToYield( _yieldData );
  571. }
  572. }
  573. virtual void recoverFromYield() {
  574. if ( _findingStartCursor.get() ) {
  575. _findingStartCursor->recoverFromYield();
  576. } else {
  577. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  578. _c.reset();
  579. _cc.reset();
  580. massert( 13338, "cursor dropped during query", false );
  581. // TODO maybe we want to prevent recording the winning plan as well?
  582. }
  583. }
  584. }
  585. virtual long long nscanned() {
  586. if ( _findingStartCursor.get() ) {
  587. return 0; // should only be one query plan, so value doesn't really matter.
  588. }
  589. assert( _c.get() );
  590. return _c->nscanned();
  591. }
  592. virtual void next() {
  593. if ( _findingStartCursor.get() ) {
  594. if ( _findingStartCursor->done() ) {
  595. _c = _findingStartCursor->cRelease();
  596. _findingStartCursor.reset( 0 );
  597. } else {
  598. _findingStartCursor->next();
  599. }
  600. return;
  601. }
  602. if ( !_c->ok() ) {
  603. finish( false );
  604. return;
  605. }
  606. bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
  607. if( 0 ) {
  608. cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
  609. }
  610. if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ){
  611. finish( true ); //?
  612. return;
  613. }
  614. _nscanned = _c->nscanned();
  615. if ( !matcher()->matches(_c->currKey(), _c->currLoc() , &_details ) ) {
  616. // not a match, continue onward
  617. if ( _details.loadedObject )
  618. _nscannedObjects++;
  619. }
  620. else {
  621. _nscannedObjects++;
  622. DiskLoc cl = _c->currLoc();
  623. if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( _c->currKey(), _c->currLoc() ) ){
  624. // cout << "TEMP skipping un-owned chunk: " << _c->current() << endl;
  625. }
  626. else if( _c->getsetdup(cl) ) {
  627. // dup
  628. }
  629. else {
  630. // got a match.
  631. if ( _inMemSort ) {
  632. // note: no cursors for non-indexed, ordered results. results must be fairly small.
  633. _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
  634. }
  635. else if ( _ntoskip > 0 ) {
  636. _ntoskip--;
  637. }
  638. else {
  639. if ( _pq.isExplain() ) {
  640. _n++;
  641. if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) {
  642. // .limit() was used, show just that much.
  643. finish( true ); //?
  644. return;
  645. }
  646. }
  647. else {
  648. if ( _pq.returnKey() ){
  649. BSONObjBuilder bb( _buf );
  650. bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
  651. bb.done();
  652. }
  653. else {
  654. BSONObj js = _c->current();
  655. assert( js.isValid() );
  656. if ( _oplogReplay ){
  657. BSONElement e = js["ts"];
  658. if ( e.type() == Date || e.type() == Timestamp )
  659. _slaveReadTill = e._opTime();
  660. }
  661. fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
  662. }
  663. _n++;
  664. if ( ! _c->supportGetMore() ){
  665. if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){
  666. finish( true );
  667. return;
  668. }
  669. }
  670. else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ){
  671. /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
  672. if ( mayCreateCursor1 ) {
  673. _wouldSaveClientCursor = true;
  674. if ( _c->advance() ) {
  675. // more...so save a cursor
  676. _saveClientCursor = true;
  677. }
  678. }
  679. finish( true );
  680. return;
  681. }
  682. }
  683. }
  684. }
  685. }
  686. _c->advance();
  687. }
  688. // this plan won, so set data for response broadly
  689. void finish( bool stop ) {
  690. if ( _pq.isExplain() ) {
  691. _n = _inMemSort ? _so->size() : _n;
  692. }
  693. else if ( _inMemSort ) {
  694. _so->fill( _buf, _pq.getFields() , _n );
  695. }
  696. if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
  697. _c->setTailable();
  698. // If the tailing request succeeded.
  699. if ( _c->tailable() )
  700. _saveClientCursor = true;
  701. if ( _pq.isExplain()) {
  702. _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), _curop.elapsedMillis(), useHints && !_pq.getHint().eoo() );
  703. } else {
  704. _response.appendData( _buf.buf(), _buf.len() );
  705. _buf.decouple();
  706. }
  707. if ( stop ) {
  708. setStop();
  709. } else {
  710. setComplete();
  711. }
  712. }
  713. void finishExplain( const BSONObj &suffix ) {
  714. BSONObj obj = _eb.finishWithSuffix( totalNscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
  715. fillQueryResultFromObj(_buf, 0, obj);
  716. _n = 1;
  717. _oldN = 0;
  718. _response.appendData( _buf.buf(), _buf.len() );
  719. _buf.decouple();
  720. }
  721. virtual bool mayRecordPlan() const { return _pq.getNumToReturn() != 1; }
  722. virtual QueryOp *_createChild() const {
  723. if ( _pq.isExplain() ) {
  724. _eb.ensureStartScan();
  725. }
  726. UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
  727. ret->_oldN = n();
  728. ret->_oldNscanned = totalNscanned();
  729. ret->_oldNscannedObjects = nscannedObjects();
  730. ret->_ntoskip = _ntoskip;
  731. return ret;
  732. }
  733. bool scanAndOrderRequired() const { return _inMemSort; }
  734. shared_ptr<Cursor> cursor() { return _c; }
  735. int n() const { return _oldN + _n; }
  736. long long totalNscanned() const { return _nscanned + _oldNscanned; }
  737. long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
  738. bool saveClientCursor() const { return _saveClientCursor; }
  739. bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
  740. void finishForOplogReplay( ClientCursor * cc ){
  741. if ( _oplogReplay && ! _slaveReadTill.isNull() )
  742. cc->_slaveReadTill = _slaveReadTill;
  743. }
  744. private:
  745. BufBuilder _buf;
  746. const ParsedQuery& _pq;
  747. long long _ntoskip;
  748. long long _nscanned;
  749. long long _oldNscanned;
  750. long long _nscannedObjects;
  751. long long _oldNscannedObjects;
  752. int _n; // found so far
  753. int _oldN;
  754. MatchDetails _details;
  755. ChunkMatcherPtr _chunkMatcher;
  756. bool _inMemSort;
  757. auto_ptr< ScanAndOrder > _so;
  758. shared_ptr<Cursor> _c;
  759. ClientCursor::CleanupPointer _cc;
  760. ClientCursor::YieldData _yieldData;
  761. bool _saveClientCursor;
  762. bool _wouldSaveClientCursor;
  763. bool _oplogReplay;
  764. auto_ptr< FindingStartCursor > _findingStartCursor;
  765. Message &_response;
  766. ExplainBuilder &_eb;
  767. CurOp &_curop;
  768. OpTime _slaveReadTill;
  769. };
  770. /* run a query -- includes checking for and running a Command */
  771. const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
  772. StringBuilder& ss = curop.debug().str;
  773. shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
  774. ParsedQuery& pq( *pq_shared );
  775. int ntoskip = q.ntoskip;
  776. BSONObj jsobj = q.query;
  777. int queryOptions = q.queryOptions;
  778. const char *ns = q.ns;
  779. if( logLevel >= 2 )
  780. log() << "query: " << ns << jsobj << endl;
  781. ss << ns;
  782. {
  783. // only say ntoreturn if nonzero.
  784. int n = pq.getNumToReturn();
  785. if( n )
  786. ss << " ntoreturn:" << n;
  787. }
  788. curop.setQuery(jsobj);
  789. if ( pq.couldBeCommand() ) {
  790. BufBuilder bb;
  791. bb.skip(sizeof(QueryResult));
  792. BSONObjBuilder cmdResBuf;
  793. if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
  794. ss << " command: " << jsobj.toString();
  795. curop.markCommand();
  796. auto_ptr< QueryResult > qr;
  797. qr.reset( (QueryResult *) bb.buf() );
  798. bb.decouple();
  799. qr->setResultFlagsToOk();
  800. qr->len = bb.len();
  801. ss << " reslen:" << bb.len();
  802. qr->setOperation(opReply);
  803. qr->cursorId = 0;
  804. qr->startingFrom = 0;
  805. qr->nReturned = 1;
  806. result.setData( qr.release(), true );
  807. }
  808. return false;
  809. }
  810. /* --- regular query --- */
  811. int n = 0;
  812. BSONElement hint = useHints ? pq.getHint() : BSONElement();
  813. bool explain = pq.isExplain();
  814. bool snapshot = pq.isSnapshot();
  815. BSONObj order = pq.getOrder();
  816. BSONObj query = pq.getFilter();
  817. /* The ElemIter will not be happy if this isn't really an object. So throw exception
  818. here when that is true.
  819. (Which may indicate bad data from client.)
  820. */
  821. if ( query.objsize() == 0 ) {
  822. out() << "Bad query object?\n jsobj:";
  823. out() << jsobj.toString() << "\n query:";
  824. out() << query.toString() << endl;
  825. uassert( 10110 , "bad query object", false);
  826. }
  827. /* --- read lock --- */
  828. mongolock lk(false);
  829. Client::Context ctx( ns , dbpath , &lk );
  830. replVerifyReadsOk(pq);
  831. if ( pq.hasOption( QueryOption_CursorTailable ) ) {
  832. NamespaceDetails *d = nsdetails( ns );
  833. uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped );
  834. const BSONObj nat1 = BSON( "$natural" << 1 );
  835. if ( order.isEmpty() ) {
  836. order = nat1;
  837. } else {
  838. uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
  839. }
  840. }
  841. BSONObj snapshotHint; // put here to keep the data in scope
  842. if( snapshot ) {
  843. NamespaceDetails *d = nsdetails(ns);
  844. if ( d ){
  845. int i = d->findIdIndex();
  846. if( i < 0 ) {
  847. if ( strstr( ns , ".system." ) == 0 )
  848. log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
  849. }
  850. else {
  851. /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here.
  852. probably need a better way to specify "use the _id index" as a hint. if someone is
  853. in the query optimizer please fix this then!
  854. */
  855. BSONObjBuilder b;
  856. b.append("$hint", d->idx(i).indexName());
  857. snapshotHint = b.obj();
  858. hint = snapshotHint.firstElement();
  859. }
  860. }
  861. }
  862. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
  863. bool nsFound = false;
  864. bool indexFound = false;
  865. BSONObj resObject;
  866. Client& c = cc();
  867. bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
  868. if ( nsFound == false || indexFound == true ){
  869. BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
  870. bb.skip(sizeof(QueryResult));
  871. ss << " idhack ";
  872. if ( found ){
  873. n = 1;
  874. fillQueryResultFromObj( bb , pq.getFields() , resObject );
  875. }
  876. auto_ptr< QueryResult > qr;
  877. qr.reset( (QueryResult *) bb.buf() );
  878. bb.decouple();
  879. qr->setResultFlagsToOk();
  880. qr->len = bb.len();
  881. ss << " reslen:" << bb.len();
  882. qr->setOperation(opReply);
  883. qr->cursorId = 0;
  884. qr->startingFrom = 0;
  885. qr->nReturned = n;
  886. result.setData( qr.release(), true );
  887. return false;
  888. }
  889. }
  890. // regular, not QO bypass query
  891. BSONObj oldPlan;
  892. if ( explain && ! pq.hasIndexSpecifier() ){
  893. MultiPlanScanner mps( ns, query, order );
  894. if ( mps.usingPrerecordedPlan() )
  895. oldPlan = mps.oldExplain();
  896. }
  897. auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) );
  898. BSONObj explainSuffix;
  899. if ( explain ) {
  900. BSONObjBuilder bb;
  901. if ( !oldPlan.isEmpty() )
  902. bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
  903. explainSuffix = bb.obj();
  904. }
  905. ExplainBuilder eb;
  906. UserQueryOp original( pq, result, eb, curop );
  907. shared_ptr< UserQueryOp > o = mps->runOp( original );
  908. UserQueryOp &dqo = *o;
  909. if ( ! dqo.complete() )
  910. throw MsgAssertionException( dqo.exception() );
  911. if ( explain ) {
  912. dqo.finishExplain( explainSuffix );
  913. }
  914. n = dqo.n();
  915. long long nscanned = dqo.totalNscanned();
  916. if ( dqo.scanAndOrderRequired() )
  917. ss << " scanAndOrder ";
  918. shared_ptr<Cursor> cursor = dqo.cursor();
  919. if( logLevel >= 5 )
  920. log() << " used cursor: " << cursor.get() << endl;
  921. long long cursorid = 0;
  922. const char * exhaust = 0;
  923. if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) {
  924. ClientCursor *cc;
  925. bool moreClauses = mps->mayRunMore();
  926. if ( moreClauses ) {
  927. // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
  928. shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher(), dqo ) );
  929. cc = new ClientCursor(queryOptions, multi, ns);
  930. } else {
  931. cursor->setMatcher( dqo.matcher() );
  932. cc = new ClientCursor( queryOptions, cursor, ns );
  933. }
  934. cursorid = cc->cursorid;
  935. cc->query = jsobj.getOwned();
  936. DEV tlog() << "query has more, cursorid: " << cursorid << endl;
  937. cc->pos = n;
  938. cc->pq = pq_shared;
  939. cc->fields = pq.getFieldPtr();
  940. cc->originalMessage = m;
  941. cc->updateLocation();
  942. if ( !cc->c->ok() && cc->c->tailable() )
  943. DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
  944. if( queryOptions & QueryOption_Exhaust ) {
  945. exhaust = ns;
  946. ss << " exhaust ";
  947. }
  948. dqo.finishForOplogReplay(cc);
  949. }
  950. QueryResult *qr = (QueryResult *) result.header();
  951. qr->cursorId = cursorid;
  952. qr->setResultFlagsToOk();
  953. // qr->len is updated automatically by appendData()
  954. ss << " reslen:" << qr->len;
  955. qr->setOperation(opReply);
  956. qr->startingFrom = 0;
  957. qr->nReturned = n;
  958. int duration = curop.elapsedMillis();
  959. bool dbprofile = curop.shouldDBProfile( duration );
  960. if ( dbprofile || duration >= cmdLine.slowMS ) {
  961. ss << " nscanned:" << nscanned << ' ';
  962. if ( ntoskip )
  963. ss << " ntoskip:" << ntoskip;
  964. if ( dbprofile )
  965. ss << " \nquery: ";
  966. ss << jsobj.toString() << ' ';
  967. }
  968. ss << " nreturned:" << n;
  969. return exhaust;
  970. }
  971. } // namespace mongo