PageRenderTime 67ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/mongodb-1.6.3/db/query.cpp

https://bitbucket.org/wesc/debian-mongodb
C++ | 1091 lines | 867 code | 134 blank | 90 comment | 214 complexity | b7ae4be4fefb1c513b891c2e97786d2a MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause
  1. // query.cpp
  2. /**
  3. * Copyright (C) 2008 10gen Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License, version 3,
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "pch.h"
  18. #include "query.h"
  19. #include "pdfile.h"
  20. #include "jsobjmanipulator.h"
  21. #include "../bson/util/builder.h"
  22. #include <time.h>
  23. #include "introspect.h"
  24. #include "btree.h"
  25. #include "../util/lruishmap.h"
  26. #include "json.h"
  27. #include "repl.h"
  28. #include "replpair.h"
  29. #include "scanandorder.h"
  30. #include "security.h"
  31. #include "curop.h"
  32. #include "commands.h"
  33. #include "queryoptimizer.h"
  34. #include "lasterror.h"
  35. #include "../s/d_logic.h"
  36. #include "repl_block.h"
  37. namespace mongo {
  38. /* We cut off further objects once we cross this threshold; thus, you might get
  39. a little bit more than this, it is a threshold rather than a limit.
  40. */
  41. const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
  42. //ns->query->DiskLoc
  43. // LRUishMap<BSONObj,DiskLoc,5> lrutest(123);
  44. extern bool useCursors;
  45. extern bool useHints;
  46. // Just try to identify best plan.
  47. class DeleteOp : public MultiCursor::CursorOp {
  48. public:
  49. DeleteOp( bool justOne, int& bestCount ) :
  50. justOne_( justOne ),
  51. count_(),
  52. bestCount_( bestCount ),
  53. _nscanned() {
  54. }
  55. virtual void _init() {
  56. c_ = qp().newCursor();
  57. }
  58. virtual bool prepareToYield() {
  59. if ( ! _cc ) {
  60. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) );
  61. }
  62. return _cc->prepareToYield( _yieldData );
  63. }
  64. virtual void recoverFromYield() {
  65. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  66. _cc.reset();
  67. c_.reset();
  68. massert( 13340, "cursor dropped during delete", false );
  69. }
  70. }
  71. virtual void next() {
  72. if ( !c_->ok() ) {
  73. setComplete();
  74. return;
  75. }
  76. DiskLoc rloc = c_->currLoc();
  77. if ( matcher()->matches(c_->currKey(), rloc ) ) {
  78. if ( !c_->getsetdup(rloc) )
  79. ++count_;
  80. }
  81. c_->advance();
  82. ++_nscanned;
  83. if ( count_ > bestCount_ )
  84. bestCount_ = count_;
  85. if ( count_ > 0 ) {
  86. if ( justOne_ )
  87. setComplete();
  88. else if ( _nscanned >= 100 && count_ == bestCount_ )
  89. setComplete();
  90. }
  91. }
  92. virtual bool mayRecordPlan() const { return !justOne_; }
  93. virtual QueryOp *_createChild() const {
  94. bestCount_ = 0; // should be safe to reset this in contexts where createChild() is called
  95. return new DeleteOp( justOne_, bestCount_ );
  96. }
  97. virtual shared_ptr<Cursor> newCursor() const { return qp().newCursor(); }
  98. private:
  99. bool justOne_;
  100. int count_;
  101. int &bestCount_;
  102. long long _nscanned;
  103. shared_ptr<Cursor> c_;
  104. ClientCursor::CleanupPointer _cc;
  105. ClientCursor::YieldData _yieldData;
  106. };
  107. /* ns: namespace, e.g. <database>.<collection>
  108. pattern: the "where" clause / criteria
  109. justOne: stop after 1 match
  110. god: allow access to system namespaces, and don't yield
  111. */
  112. long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
  113. if( !god ) {
  114. if ( strstr(ns, ".system.") ) {
  115. /* note a delete from system.indexes would corrupt the db
  116. if done here, as there are pointers into those objects in
  117. NamespaceDetails.
  118. */
  119. uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
  120. }
  121. if ( strchr( ns , '$' ) ){
  122. log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
  123. uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
  124. }
  125. }
  126. NamespaceDetails *d = nsdetails( ns );
  127. if ( ! d )
  128. return 0;
  129. uassert( 10101 , "can't remove from a capped collection" , ! d->capped );
  130. long long nDeleted = 0;
  131. int best = 0;
  132. shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) );
  133. shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, true ) );
  134. if( !creal->ok() )
  135. return nDeleted;
  136. shared_ptr< Cursor > cPtr = creal;
  137. auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
  138. cc->setDoingDeletes( true );
  139. CursorId id = cc->cursorid;
  140. bool justOne = justOneOrig;
  141. bool canYield = !god && !creal->matcher()->docMatcher().atomic();
  142. do {
  143. if ( canYield && ! cc->yieldSometimes() ){
  144. cc.release(); // has already been deleted elsewhere
  145. // TODO should we assert or something?
  146. break;
  147. }
  148. if ( !cc->c->ok() ) {
  149. break; // if we yielded, could have hit the end
  150. }
  151. // this way we can avoid calling updateLocation() every time (expensive)
  152. // as well as some other nuances handled
  153. cc->setDoingDeletes( true );
  154. DiskLoc rloc = cc->c->currLoc();
  155. BSONObj key = cc->c->currKey();
  156. // NOTE Calling advance() may change the matcher, so it's important
  157. // to try to match first.
  158. bool match = creal->matcher()->matches( key , rloc );
  159. if ( ! cc->c->advance() )
  160. justOne = true;
  161. if ( ! match )
  162. continue;
  163. assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it!
  164. if ( !justOne ) {
  165. /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore
  166. blocks. here we might call millions of times which would be bad.
  167. */
  168. cc->c->noteLocation();
  169. }
  170. if ( logop ) {
  171. BSONElement e;
  172. if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
  173. BSONObjBuilder b;
  174. b.append( e );
  175. bool replJustOne = true;
  176. logOp( "d", ns, b.done(), 0, &replJustOne );
  177. } else {
  178. problem() << "deleted object without id, not logging" << endl;
  179. }
  180. }
  181. if ( rs )
  182. rs->goingToDelete( rloc.obj() /*cc->c->current()*/ );
  183. theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
  184. nDeleted++;
  185. if ( justOne ) {
  186. break;
  187. }
  188. cc->c->checkLocation();
  189. } while ( cc->c->ok() );
  190. if ( cc.get() && ClientCursor::find( id , false ) == 0 ){
  191. cc.release();
  192. }
  193. return nDeleted;
  194. }
  195. int otherTraceLevel = 0;
  196. int initialExtentSize(int len);
  197. bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
  198. try {
  199. return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
  200. }
  201. catch ( AssertionException& e ) {
  202. e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
  203. }
  204. curop.debug().str << " assertion ";
  205. anObjBuilder.append("errmsg", "db assertion failure");
  206. anObjBuilder.append("ok", 0.0);
  207. BSONObj x = anObjBuilder.done();
  208. b.appendBuf((void*) x.objdata(), x.objsize());
  209. return true;
  210. }
  211. int nCaught = 0;
  212. void killCursors(int n, long long *ids) {
  213. int k = 0;
  214. for ( int i = 0; i < n; i++ ) {
  215. if ( ClientCursor::erase(ids[i]) )
  216. k++;
  217. }
  218. if ( logLevel > 0 || k != n ){
  219. log( k == n ) << "killcursors: found " << k << " of " << n << endl;
  220. }
  221. }
  222. BSONObj id_obj = fromjson("{\"_id\":1}");
  223. BSONObj empty_obj = fromjson("{}");
  224. //int dump = 0;
  225. /* empty result for error conditions */
  226. QueryResult* emptyMoreResult(long long cursorid) {
  227. BufBuilder b(32768);
  228. b.skip(sizeof(QueryResult));
  229. QueryResult *qr = (QueryResult *) b.buf();
  230. qr->cursorId = 0; // 0 indicates no more data to retrieve.
  231. qr->startingFrom = 0;
  232. qr->len = b.len();
  233. qr->setOperation(opReply);
  234. qr->nReturned = 0;
  235. b.decouple();
  236. return qr;
  237. }
  238. QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
  239. // log() << "TEMP GETMORE " << ns << ' ' << cursorid << ' ' << pass << endl;
  240. exhaust = false;
  241. ClientCursor::Pointer p(cursorid);
  242. ClientCursor *cc = p._c;
  243. int bufSize = 512;
  244. if ( cc ){
  245. bufSize += sizeof( QueryResult );
  246. bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024;
  247. }
  248. BufBuilder b( bufSize );
  249. b.skip(sizeof(QueryResult));
  250. int resultFlags = ResultFlag_AwaitCapable;
  251. int start = 0;
  252. int n = 0;
  253. if ( !cc ) {
  254. log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
  255. cursorid = 0;
  256. resultFlags = ResultFlag_CursorNotFound;
  257. }
  258. else {
  259. if ( pass == 0 )
  260. cc->updateSlaveLocation( curop );
  261. int queryOptions = cc->_queryOptions;
  262. if( pass == 0 ) {
  263. StringBuilder& ss = curop.debug().str;
  264. ss << " getMore: " << cc->query.toString() << " ";
  265. }
  266. start = cc->pos;
  267. Cursor *c = cc->c.get();
  268. c->checkLocation();
  269. DiskLoc last;
  270. while ( 1 ) {
  271. if ( !c->ok() ) {
  272. // log() << "TEMP Tailable : " << c->tailable() << ' ' << (queryOptions & QueryOption_AwaitData) << endl;
  273. if ( c->tailable() ) {
  274. /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
  275. advance() can still be retries as a reactivation attempt. when there is new data, it will
  276. return true. that's what we are doing here.
  277. */
  278. if ( c->advance() )
  279. continue;
  280. if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
  281. throw GetMoreWaitException();
  282. }
  283. break;
  284. }
  285. p.release();
  286. bool ok = ClientCursor::erase(cursorid);
  287. assert(ok);
  288. cursorid = 0;
  289. cc = 0;
  290. break;
  291. }
  292. // in some cases (clone collection) there won't be a matcher
  293. if ( c->matcher() && !c->matcher()->matches(c->currKey(), c->currLoc() ) ) {
  294. }
  295. /*
  296. TODO
  297. else if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( c->currKey(), c->currLoc() ) ){
  298. cout << "TEMP skipping un-owned chunk: " << c->current() << endl;
  299. }
  300. */
  301. else {
  302. if( c->getsetdup(c->currLoc()) ) {
  303. //out() << " but it's a dup \n";
  304. }
  305. else {
  306. last = c->currLoc();
  307. BSONObj js = c->current();
  308. // show disk loc should be part of the main query, not in an $or clause, so this should be ok
  309. fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
  310. n++;
  311. if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) ||
  312. (ntoreturn==0 && b.len()>1*1024*1024) ) {
  313. c->advance();
  314. cc->pos += n;
  315. break;
  316. }
  317. }
  318. }
  319. c->advance();
  320. }
  321. if ( cc ) {
  322. cc->updateLocation();
  323. cc->mayUpgradeStorage();
  324. cc->storeOpForSlave( last );
  325. exhaust = cc->_queryOptions & QueryOption_Exhaust;
  326. }
  327. }
  328. QueryResult *qr = (QueryResult *) b.buf();
  329. qr->len = b.len();
  330. qr->setOperation(opReply);
  331. qr->_resultFlags() = resultFlags;
  332. qr->cursorId = cursorid;
  333. qr->startingFrom = start;
  334. qr->nReturned = n;
  335. b.decouple();
  336. return qr;
  337. }
  338. class CountOp : public QueryOp {
  339. public:
  340. CountOp( const string& ns , const BSONObj &spec ) :
  341. _ns(ns), count_(),
  342. skip_( spec["skip"].numberLong() ),
  343. limit_( spec["limit"].numberLong() ),
  344. bc_(){
  345. }
  346. virtual void _init() {
  347. c_ = qp().newCursor();
  348. if ( qp().exactKeyMatch() && ! matcher()->needRecord() ) {
  349. query_ = qp().simplifiedQuery( qp().indexKey() );
  350. bc_ = dynamic_cast< BtreeCursor* >( c_.get() );
  351. bc_->forgetEndKey();
  352. }
  353. }
  354. virtual bool prepareToYield() {
  355. if ( ! _cc ) {
  356. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , _ns.c_str() ) );
  357. }
  358. return _cc->prepareToYield( _yieldData );
  359. }
  360. virtual void recoverFromYield() {
  361. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  362. c_.reset();
  363. _cc.reset();
  364. massert( 13337, "cursor dropped during count", false );
  365. // TODO maybe we want to prevent recording the winning plan as well?
  366. }
  367. }
  368. virtual void next() {
  369. if ( !c_->ok() ) {
  370. setComplete();
  371. return;
  372. }
  373. if ( bc_ ) {
  374. if ( firstMatch_.isEmpty() ) {
  375. firstMatch_ = bc_->currKeyNode().key;
  376. // if not match
  377. if ( query_.woCompare( firstMatch_, BSONObj(), false ) ) {
  378. setComplete();
  379. return;
  380. }
  381. _gotOne();
  382. } else {
  383. if ( !firstMatch_.woEqual( bc_->currKeyNode().key ) ) {
  384. setComplete();
  385. return;
  386. }
  387. _gotOne();
  388. }
  389. }
  390. else {
  391. if ( !matcher()->matches(c_->currKey(), c_->currLoc() ) ) {
  392. }
  393. else if( !c_->getsetdup(c_->currLoc()) ) {
  394. _gotOne();
  395. }
  396. }
  397. c_->advance();
  398. }
  399. virtual QueryOp *_createChild() const {
  400. CountOp *ret = new CountOp( _ns , BSONObj() );
  401. ret->count_ = count_;
  402. ret->skip_ = skip_;
  403. ret->limit_ = limit_;
  404. return ret;
  405. }
  406. long long count() const { return count_; }
  407. virtual bool mayRecordPlan() const { return true; }
  408. private:
  409. void _gotOne(){
  410. if ( skip_ ){
  411. skip_--;
  412. return;
  413. }
  414. if ( limit_ > 0 && count_ >= limit_ ){
  415. setStop();
  416. return;
  417. }
  418. count_++;
  419. }
  420. string _ns;
  421. long long count_;
  422. long long skip_;
  423. long long limit_;
  424. shared_ptr<Cursor> c_;
  425. BSONObj query_;
  426. BtreeCursor *bc_;
  427. BSONObj firstMatch_;
  428. ClientCursor::CleanupPointer _cc;
  429. ClientCursor::YieldData _yieldData;
  430. };
  431. /* { count: "collectionname"[, query: <query>] }
  432. returns -1 on ns does not exist error.
  433. */
  434. long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
  435. Client::Context cx(ns);
  436. NamespaceDetails *d = nsdetails( ns );
  437. if ( !d ) {
  438. err = "ns missing";
  439. return -1;
  440. }
  441. BSONObj query = cmd.getObjectField("query");
  442. // count of all objects
  443. if ( query.isEmpty() ){
  444. return applySkipLimit( d->nrecords , cmd );
  445. }
  446. MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
  447. CountOp original( ns , cmd );
  448. shared_ptr< CountOp > res = mps.runOp( original );
  449. if ( !res->complete() ) {
  450. log() << "Count with ns: " << ns << " and query: " << query
  451. << " failed with exception: " << res->exception()
  452. << endl;
  453. return 0;
  454. }
  455. return res->count();
  456. }
  457. class ExplainBuilder {
  458. public:
  459. ExplainBuilder() : _i() {}
  460. void ensureStartScan() {
  461. if ( !_a.get() ) {
  462. _a.reset( new BSONArrayBuilder() );
  463. }
  464. }
  465. void noteCursor( Cursor *c ) {
  466. BSONObjBuilder b( _a->subobjStart() );
  467. b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
  468. b.done();
  469. }
  470. void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, int millis, bool hint ) {
  471. if ( _i == 1 ) {
  472. _c.reset( new BSONArrayBuilder() );
  473. *_c << _b->obj();
  474. }
  475. if ( _i == 0 ) {
  476. _b.reset( new BSONObjBuilder() );
  477. } else {
  478. _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
  479. }
  480. *_b << "cursor" << c->toString();
  481. _b->appendNumber( "nscanned", nscanned );
  482. _b->appendNumber( "nscannedObjects", nscannedObjects );
  483. *_b << "n" << n;
  484. if ( scanAndOrder )
  485. *_b << "scanAndOrder" << true;
  486. *_b << "millis" << millis;
  487. *_b << "indexBounds" << c->prettyIndexBounds();
  488. if ( !hint ) {
  489. *_b << "allPlans" << _a->arr();
  490. }
  491. if ( _i != 0 ) {
  492. _b->done();
  493. }
  494. _a.reset( 0 );
  495. ++_i;
  496. }
  497. BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
  498. if ( _i > 1 ) {
  499. BSONObjBuilder b;
  500. b << "clauses" << _c->arr();
  501. b.appendNumber( "nscanned", nscanned );
  502. b.appendNumber( "nscanneObjects", nscannedObjects );
  503. b << "n" << n;
  504. b << "millis" << millis;
  505. b.appendElements( suffix );
  506. return b.obj();
  507. } else {
  508. _b->appendElements( suffix );
  509. return _b->obj();
  510. }
  511. }
  512. private:
  513. auto_ptr< BSONArrayBuilder > _a;
  514. auto_ptr< BSONObjBuilder > _b;
  515. auto_ptr< BSONArrayBuilder > _c;
  516. int _i;
  517. };
  518. // Implements database 'query' requests using the query optimizer's QueryOp interface
  519. class UserQueryOp : public QueryOp {
  520. public:
  521. UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
  522. _buf( 32768 ) , // TODO be smarter here
  523. _pq( pq ) ,
  524. _ntoskip( pq.getSkip() ) ,
  525. _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
  526. _n(0),
  527. _oldN(0),
  528. _chunkMatcher(shardingState.getChunkMatcher(pq.ns())),
  529. _inMemSort(false),
  530. _saveClientCursor(false),
  531. _wouldSaveClientCursor(false),
  532. _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
  533. _response( response ),
  534. _eb( eb ),
  535. _curop( curop )
  536. {}
  537. virtual void _init() {
  538. // only need to put the QueryResult fields there if we're building the first buffer in the message.
  539. if ( _response.empty() ) {
  540. _buf.skip( sizeof( QueryResult ) );
  541. }
  542. if ( _oplogReplay ) {
  543. _findingStartCursor.reset( new FindingStartCursor( qp() ) );
  544. } else {
  545. _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
  546. }
  547. if ( qp().scanAndOrderRequired() ) {
  548. _inMemSort = true;
  549. _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) );
  550. }
  551. if ( _pq.isExplain() ) {
  552. _eb.noteCursor( _c.get() );
  553. }
  554. }
  555. virtual bool prepareToYield() {
  556. if ( _findingStartCursor.get() ) {
  557. return _findingStartCursor->prepareToYield();
  558. } else {
  559. if ( ! _cc ) {
  560. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
  561. }
  562. return _cc->prepareToYield( _yieldData );
  563. }
  564. }
  565. virtual void recoverFromYield() {
  566. if ( _findingStartCursor.get() ) {
  567. _findingStartCursor->recoverFromYield();
  568. } else {
  569. if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
  570. _c.reset();
  571. _cc.reset();
  572. _so.reset();
  573. massert( 13338, "cursor dropped during query", false );
  574. // TODO maybe we want to prevent recording the winning plan as well?
  575. }
  576. }
  577. }
  578. virtual void next() {
  579. if ( _findingStartCursor.get() ) {
  580. if ( _findingStartCursor->done() ) {
  581. _c = _findingStartCursor->cRelease();
  582. _findingStartCursor.reset( 0 );
  583. } else {
  584. _findingStartCursor->next();
  585. }
  586. return;
  587. }
  588. if ( !_c->ok() ) {
  589. finish( false );
  590. return;
  591. }
  592. bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
  593. if( 0 ) {
  594. cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
  595. }
  596. if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ){
  597. finish( true ); //?
  598. return;
  599. }
  600. _nscanned++;
  601. if ( !matcher()->matches(_c->currKey(), _c->currLoc() , &_details ) ) {
  602. // not a match, continue onward
  603. if ( _details.loadedObject )
  604. _nscannedObjects++;
  605. }
  606. else {
  607. _nscannedObjects++;
  608. DiskLoc cl = _c->currLoc();
  609. if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( _c->currKey(), _c->currLoc() ) ){
  610. // cout << "TEMP skipping un-owned chunk: " << _c->current() << endl;
  611. }
  612. else if( _c->getsetdup(cl) ) {
  613. // dup
  614. }
  615. else {
  616. // got a match.
  617. if ( _inMemSort ) {
  618. // note: no cursors for non-indexed, ordered results. results must be fairly small.
  619. _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
  620. }
  621. else if ( _ntoskip > 0 ) {
  622. _ntoskip--;
  623. }
  624. else {
  625. if ( _pq.isExplain() ) {
  626. _n++;
  627. if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) {
  628. // .limit() was used, show just that much.
  629. finish( true ); //?
  630. return;
  631. }
  632. }
  633. else {
  634. if ( _pq.returnKey() ){
  635. BSONObjBuilder bb( _buf );
  636. bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
  637. bb.done();
  638. }
  639. else {
  640. BSONObj js = _c->current();
  641. assert( js.isValid() );
  642. if ( _oplogReplay ){
  643. BSONElement e = js["ts"];
  644. if ( e.type() == Date || e.type() == Timestamp )
  645. _slaveReadTill = e._opTime();
  646. }
  647. fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
  648. }
  649. _n++;
  650. if ( ! _c->supportGetMore() ){
  651. if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){
  652. finish( true );
  653. return;
  654. }
  655. }
  656. else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ){
  657. /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
  658. if ( mayCreateCursor1 ) {
  659. _wouldSaveClientCursor = true;
  660. if ( _c->advance() ) {
  661. // more...so save a cursor
  662. _saveClientCursor = true;
  663. }
  664. }
  665. finish( true );
  666. return;
  667. }
  668. }
  669. }
  670. }
  671. }
  672. _c->advance();
  673. }
  674. // this plan won, so set data for response broadly
  675. void finish( bool stop ) {
  676. if ( _pq.isExplain() ) {
  677. _n = _inMemSort ? _so->size() : _n;
  678. }
  679. else if ( _inMemSort ) {
  680. if( _so.get() )
  681. _so->fill( _buf, _pq.getFields() , _n );
  682. }
  683. if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
  684. _c->setTailable();
  685. // If the tailing request succeeded.
  686. if ( _c->tailable() )
  687. _saveClientCursor = true;
  688. if ( _pq.isExplain()) {
  689. _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), _curop.elapsedMillis(), useHints && !_pq.getHint().eoo() );
  690. } else {
  691. _response.appendData( _buf.buf(), _buf.len() );
  692. _buf.decouple();
  693. }
  694. if ( stop ) {
  695. setStop();
  696. } else {
  697. setComplete();
  698. }
  699. }
  700. void finishExplain( const BSONObj &suffix ) {
  701. BSONObj obj = _eb.finishWithSuffix( nscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
  702. fillQueryResultFromObj(_buf, 0, obj);
  703. _n = 1;
  704. _oldN = 0;
  705. _response.appendData( _buf.buf(), _buf.len() );
  706. _buf.decouple();
  707. }
  708. virtual bool mayRecordPlan() const { return _pq.getNumToReturn() != 1; }
  709. virtual QueryOp *_createChild() const {
  710. if ( _pq.isExplain() ) {
  711. _eb.ensureStartScan();
  712. }
  713. UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
  714. ret->_oldN = n();
  715. ret->_oldNscanned = nscanned();
  716. ret->_oldNscannedObjects = nscannedObjects();
  717. ret->_ntoskip = _ntoskip;
  718. return ret;
  719. }
  720. bool scanAndOrderRequired() const { return _inMemSort; }
  721. shared_ptr<Cursor> cursor() { return _c; }
  722. int n() const { return _oldN + _n; }
  723. long long nscanned() const { return _nscanned + _oldNscanned; }
  724. long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
  725. bool saveClientCursor() const { return _saveClientCursor; }
  726. bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
  727. void finishForOplogReplay( ClientCursor * cc ){
  728. if ( _oplogReplay && ! _slaveReadTill.isNull() )
  729. cc->_slaveReadTill = _slaveReadTill;
  730. }
  731. private:
  732. BufBuilder _buf;
  733. const ParsedQuery& _pq;
  734. long long _ntoskip;
  735. long long _nscanned;
  736. long long _oldNscanned;
  737. long long _nscannedObjects;
  738. long long _oldNscannedObjects;
  739. int _n; // found so far
  740. int _oldN;
  741. MatchDetails _details;
  742. ChunkMatcherPtr _chunkMatcher;
  743. bool _inMemSort;
  744. auto_ptr< ScanAndOrder > _so;
  745. shared_ptr<Cursor> _c;
  746. ClientCursor::CleanupPointer _cc;
  747. ClientCursor::YieldData _yieldData;
  748. bool _saveClientCursor;
  749. bool _wouldSaveClientCursor;
  750. bool _oplogReplay;
  751. auto_ptr< FindingStartCursor > _findingStartCursor;
  752. Message &_response;
  753. ExplainBuilder &_eb;
  754. CurOp &_curop;
  755. OpTime _slaveReadTill;
  756. };
  757. /* run a query -- includes checking for and running a Command */
  758. const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
  759. StringBuilder& ss = curop.debug().str;
  760. shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
  761. ParsedQuery& pq( *pq_shared );
  762. int ntoskip = q.ntoskip;
  763. BSONObj jsobj = q.query;
  764. int queryOptions = q.queryOptions;
  765. const char *ns = q.ns;
  766. if( logLevel >= 2 )
  767. log() << "query: " << ns << jsobj << endl;
  768. ss << ns;
  769. {
  770. // only say ntoreturn if nonzero.
  771. int n = pq.getNumToReturn();
  772. if( n )
  773. ss << " ntoreturn:" << n;
  774. }
  775. curop.setQuery(jsobj);
  776. if ( pq.couldBeCommand() ) {
  777. BufBuilder bb;
  778. bb.skip(sizeof(QueryResult));
  779. BSONObjBuilder cmdResBuf;
  780. if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
  781. ss << " command: " << jsobj.toString();
  782. curop.markCommand();
  783. auto_ptr< QueryResult > qr;
  784. qr.reset( (QueryResult *) bb.buf() );
  785. bb.decouple();
  786. qr->setResultFlagsToOk();
  787. qr->len = bb.len();
  788. ss << " reslen:" << bb.len();
  789. qr->setOperation(opReply);
  790. qr->cursorId = 0;
  791. qr->startingFrom = 0;
  792. qr->nReturned = 1;
  793. result.setData( qr.release(), true );
  794. }
  795. return false;
  796. }
  797. /* --- regular query --- */
  798. int n = 0;
  799. BSONElement hint = useHints ? pq.getHint() : BSONElement();
  800. bool explain = pq.isExplain();
  801. bool snapshot = pq.isSnapshot();
  802. BSONObj order = pq.getOrder();
  803. BSONObj query = pq.getFilter();
  804. /* The ElemIter will not be happy if this isn't really an object. So throw exception
  805. here when that is true.
  806. (Which may indicate bad data from client.)
  807. */
  808. if ( query.objsize() == 0 ) {
  809. out() << "Bad query object?\n jsobj:";
  810. out() << jsobj.toString() << "\n query:";
  811. out() << query.toString() << endl;
  812. uassert( 10110 , "bad query object", false);
  813. }
  814. /* --- read lock --- */
  815. mongolock lk(false);
  816. Client::Context ctx( ns , dbpath , &lk );
  817. replVerifyReadsOk(pq);
  818. if ( pq.hasOption( QueryOption_CursorTailable ) ) {
  819. NamespaceDetails *d = nsdetails( ns );
  820. uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped );
  821. const BSONObj nat1 = BSON( "$natural" << 1 );
  822. if ( order.isEmpty() ) {
  823. order = nat1;
  824. } else {
  825. uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
  826. }
  827. }
  828. BSONObj snapshotHint; // put here to keep the data in scope
  829. if( snapshot ) {
  830. NamespaceDetails *d = nsdetails(ns);
  831. if ( d ){
  832. int i = d->findIdIndex();
  833. if( i < 0 ) {
  834. if ( strstr( ns , ".system." ) == 0 )
  835. log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
  836. }
  837. else {
  838. /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here.
  839. probably need a better way to specify "use the _id index" as a hint. if someone is
  840. in the query optimizer please fix this then!
  841. */
  842. BSONObjBuilder b;
  843. b.append("$hint", d->idx(i).indexName());
  844. snapshotHint = b.obj();
  845. hint = snapshotHint.firstElement();
  846. }
  847. }
  848. }
  849. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
  850. bool nsFound = false;
  851. bool indexFound = false;
  852. BSONObj resObject;
  853. Client& c = cc();
  854. bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
  855. if ( nsFound == false || indexFound == true ){
  856. BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
  857. bb.skip(sizeof(QueryResult));
  858. ss << " idhack ";
  859. if ( found ){
  860. n = 1;
  861. fillQueryResultFromObj( bb , pq.getFields() , resObject );
  862. }
  863. auto_ptr< QueryResult > qr;
  864. qr.reset( (QueryResult *) bb.buf() );
  865. bb.decouple();
  866. qr->setResultFlagsToOk();
  867. qr->len = bb.len();
  868. ss << " reslen:" << bb.len();
  869. qr->setOperation(opReply);
  870. qr->cursorId = 0;
  871. qr->startingFrom = 0;
  872. qr->nReturned = n;
  873. result.setData( qr.release(), true );
  874. return false;
  875. }
  876. }
  877. // regular, not QO bypass query
  878. BSONObj oldPlan;
  879. if ( explain && ! pq.hasIndexSpecifier() ){
  880. MultiPlanScanner mps( ns, query, order );
  881. if ( mps.usingPrerecordedPlan() )
  882. oldPlan = mps.oldExplain();
  883. }
  884. auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) );
  885. BSONObj explainSuffix;
  886. if ( explain ) {
  887. BSONObjBuilder bb;
  888. if ( !oldPlan.isEmpty() )
  889. bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
  890. explainSuffix = bb.obj();
  891. }
  892. ExplainBuilder eb;
  893. UserQueryOp original( pq, result, eb, curop );
  894. shared_ptr< UserQueryOp > o = mps->runOp( original );
  895. UserQueryOp &dqo = *o;
  896. if ( ! dqo.complete() )
  897. throw MsgAssertionException( dqo.exception() );
  898. if ( explain ) {
  899. dqo.finishExplain( explainSuffix );
  900. }
  901. n = dqo.n();
  902. long long nscanned = dqo.nscanned();
  903. if ( dqo.scanAndOrderRequired() )
  904. ss << " scanAndOrder ";
  905. shared_ptr<Cursor> cursor = dqo.cursor();
  906. if( logLevel >= 5 )
  907. log() << " used cursor: " << cursor.get() << endl;
  908. long long cursorid = 0;
  909. const char * exhaust = 0;
  910. if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) {
  911. ClientCursor *cc;
  912. bool moreClauses = mps->mayRunMore();
  913. if ( moreClauses ) {
  914. // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
  915. shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher(), dqo ) );
  916. cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned());
  917. } else {
  918. cursor->setMatcher( dqo.matcher() );
  919. cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() );
  920. }
  921. cursorid = cc->cursorid;
  922. DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
  923. cc->pos = n;
  924. cc->pq = pq_shared;
  925. cc->fields = pq.getFieldPtr();
  926. cc->originalMessage = m;
  927. cc->updateLocation();
  928. if ( !cc->c->ok() && cc->c->tailable() )
  929. DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
  930. if( queryOptions & QueryOption_Exhaust ) {
  931. exhaust = ns;
  932. ss << " exhaust ";
  933. }
  934. dqo.finishForOplogReplay(cc);
  935. }
  936. QueryResult *qr = (QueryResult *) result.header();
  937. qr->cursorId = cursorid;
  938. qr->setResultFlagsToOk();
  939. // qr->len is updated automatically by appendData()
  940. ss << " reslen:" << qr->len;
  941. qr->setOperation(opReply);
  942. qr->startingFrom = 0;
  943. qr->nReturned = n;
  944. int duration = curop.elapsedMillis();
  945. bool dbprofile = curop.shouldDBProfile( duration );
  946. if ( dbprofile || duration >= cmdLine.slowMS ) {
  947. ss << " nscanned:" << nscanned << ' ';
  948. if ( ntoskip )
  949. ss << " ntoskip:" << ntoskip;
  950. if ( dbprofile )
  951. ss << " \nquery: ";
  952. ss << jsobj.toString() << ' ';
  953. }
  954. ss << " nreturned:" << n;
  955. return exhaust;
  956. }
  957. } // namespace mongo