PageRenderTime 58ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/mongodb-src-r2.0.4/db/ops/query.cpp

#
C++ | 1041 lines | 829 code | 135 blank | 77 comment | 216 complexity | 6523e7ea1aa67dc597a3bec368b62d49 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. // query.cpp
  2. /**
  3. * Copyright (C) 2008 10gen Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License, version 3,
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "pch.h"
  18. #include "query.h"
  19. #include "../pdfile.h"
  20. #include "../jsobjmanipulator.h"
  21. #include "../../bson/util/builder.h"
  22. #include <time.h>
  23. #include "../introspect.h"
  24. #include "../btree.h"
  25. #include "../../util/lruishmap.h"
  26. #include "../json.h"
  27. #include "../repl.h"
  28. #include "../replutil.h"
  29. #include "../scanandorder.h"
  30. #include "../security.h"
  31. #include "../curop-inl.h"
  32. #include "../commands.h"
  33. #include "../queryoptimizer.h"
  34. #include "../lasterror.h"
  35. #include "../../s/d_logic.h"
  36. #include "../repl_block.h"
  37. #include "../../server.h"
  38. namespace mongo {
  39. /* We cut off further objects once we cross this threshold; thus, you might get
  40. a little bit more than this, it is a threshold rather than a limit.
  41. */
  42. const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
  43. //ns->query->DiskLoc
  44. // LRUishMap<BSONObj,DiskLoc,5> lrutest(123);
  45. extern bool useCursors;
  46. extern bool useHints;
  47. bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
  48. try {
  49. return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
  50. }
  51. catch ( AssertionException& e ) {
  52. e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
  53. curop.debug().exceptionInfo = e.getInfo();
  54. }
  55. anObjBuilder.append("errmsg", "db assertion failure");
  56. anObjBuilder.append("ok", 0.0);
  57. BSONObj x = anObjBuilder.done();
  58. b.appendBuf((void*) x.objdata(), x.objsize());
  59. return true;
  60. }
  61. BSONObj id_obj = fromjson("{\"_id\":1}");
  62. BSONObj empty_obj = fromjson("{}");
  63. //int dump = 0;
  64. /* empty result for error conditions */
  65. QueryResult* emptyMoreResult(long long cursorid) {
  66. BufBuilder b(32768);
  67. b.skip(sizeof(QueryResult));
  68. QueryResult *qr = (QueryResult *) b.buf();
  69. qr->cursorId = 0; // 0 indicates no more data to retrieve.
  70. qr->startingFrom = 0;
  71. qr->len = b.len();
  72. qr->setOperation(opReply);
  73. qr->initializeResultFlags();
  74. qr->nReturned = 0;
  75. b.decouple();
  76. return qr;
  77. }
  78. QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
  79. exhaust = false;
  80. ClientCursor::Pointer p(cursorid);
  81. ClientCursor *cc = p.c();
  82. int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;
  83. BufBuilder b( bufSize );
  84. b.skip(sizeof(QueryResult));
  85. int resultFlags = ResultFlag_AwaitCapable;
  86. int start = 0;
  87. int n = 0;
  88. if ( unlikely(!cc) ) {
  89. log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
  90. cursorid = 0;
  91. resultFlags = ResultFlag_CursorNotFound;
  92. }
  93. else {
  94. // check for spoofing of the ns such that it does not match the one originally there for the cursor
  95. uassert(14833, "auth error", str::equals(ns, cc->ns().c_str()));
  96. if ( pass == 0 )
  97. cc->updateSlaveLocation( curop );
  98. int queryOptions = cc->queryOptions();
  99. curop.debug().query = cc->query();
  100. start = cc->pos();
  101. Cursor *c = cc->c();
  102. c->checkLocation();
  103. DiskLoc last;
  104. scoped_ptr<Projection::KeyOnly> keyFieldsOnly;
  105. if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields )
  106. keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) );
  107. // This manager may be stale, but it's the state of chunking when the cursor was created.
  108. ShardChunkManagerPtr manager = cc->getChunkManager();
  109. while ( 1 ) {
  110. if ( !c->ok() ) {
  111. if ( c->tailable() ) {
  112. /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
  113. advance() can still be retries as a reactivation attempt. when there is new data, it will
  114. return true. that's what we are doing here.
  115. */
  116. if ( c->advance() )
  117. continue;
  118. if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
  119. return 0;
  120. }
  121. break;
  122. }
  123. p.release();
  124. bool ok = ClientCursor::erase(cursorid);
  125. assert(ok);
  126. cursorid = 0;
  127. cc = 0;
  128. break;
  129. }
  130. // in some cases (clone collection) there won't be a matcher
  131. if ( c->matcher() && !c->matcher()->matchesCurrent( c ) ) {
  132. }
  133. else if ( manager && ! manager->belongsToMe( cc ) ){
  134. LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl;
  135. }
  136. else {
  137. if( c->getsetdup(c->currLoc()) ) {
  138. //out() << " but it's a dup \n";
  139. }
  140. else {
  141. last = c->currLoc();
  142. n++;
  143. if ( keyFieldsOnly ) {
  144. fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) );
  145. }
  146. else {
  147. BSONObj js = c->current();
  148. // show disk loc should be part of the main query, not in an $or clause, so this should be ok
  149. fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
  150. }
  151. if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
  152. c->advance();
  153. cc->incPos( n );
  154. break;
  155. }
  156. }
  157. }
  158. c->advance();
  159. if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) {
  160. ClientCursor::erase(cursorid);
  161. cursorid = 0;
  162. cc = 0;
  163. p.deleted();
  164. break;
  165. }
  166. }
  167. if ( cc ) {
  168. cc->updateLocation();
  169. cc->mayUpgradeStorage();
  170. cc->storeOpForSlave( last );
  171. exhaust = cc->queryOptions() & QueryOption_Exhaust;
  172. }
  173. }
  174. QueryResult *qr = (QueryResult *) b.buf();
  175. qr->len = b.len();
  176. qr->setOperation(opReply);
  177. qr->_resultFlags() = resultFlags;
  178. qr->cursorId = cursorid;
  179. qr->startingFrom = start;
  180. qr->nReturned = n;
  181. b.decouple();
  182. return qr;
  183. }
  184. class CountOp : public QueryOp {
  185. public:
  186. CountOp( const string& ns , const BSONObj &spec ) :
  187. _ns(ns), _capped(false), _count(), _myCount(),
  188. _skip( spec["skip"].numberLong() ),
  189. _limit( spec["limit"].numberLong() ),
  190. _nscanned(),
  191. _bc(),
  192. _yieldRecoveryFailed() {
  193. }
  194. virtual void _init() {
  195. _c = qp().newCursor();
  196. _capped = _c->capped();
  197. if ( qp().exactKeyMatch() && ! matcher( _c )->needRecord() ) {
  198. _query = qp().simplifiedQuery( qp().indexKey() );
  199. _bc = dynamic_cast< BtreeCursor* >( _c.get() );
  200. _bc->forgetEndKey();
  201. }
  202. }
  203. virtual long long nscanned() {
  204. return _c.get() ? _c->nscanned() : _nscanned;
  205. }
  206. virtual bool prepareToYield() {
  207. if ( _c && !_cc ) {
  208. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _ns.c_str() ) );
  209. }
  210. if ( _cc ) {
  211. return _cc->prepareToYield( _yieldData );
  212. }
  213. // no active cursor - ok to yield
  214. return true;
  215. }
  216. virtual void recoverFromYield() {
  217. if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) {
  218. _yieldRecoveryFailed = true;
  219. _c.reset();
  220. _cc.reset();
  221. if ( _capped ) {
  222. msgassertedNoTrace( 13337, str::stream() << "capped cursor overrun during count: " << _ns );
  223. }
  224. else if ( qp().mustAssertOnYieldFailure() ) {
  225. msgassertedNoTrace( 15891, str::stream() << "CountOp::recoverFromYield() failed to recover: " << _ns );
  226. }
  227. else {
  228. // we don't fail query since we're fine with returning partial data if collection dropped
  229. }
  230. }
  231. }
  232. virtual void next() {
  233. if ( ! _c || !_c->ok() ) {
  234. setComplete();
  235. return;
  236. }
  237. _nscanned = _c->nscanned();
  238. if ( _bc ) {
  239. if ( _firstMatch.isEmpty() ) {
  240. _firstMatch = _bc->currKey().getOwned();
  241. // if not match
  242. if ( _query.woCompare( _firstMatch, BSONObj(), false ) ) {
  243. setComplete();
  244. return;
  245. }
  246. _gotOne();
  247. }
  248. else {
  249. if ( ! _firstMatch.equal( _bc->currKey() ) ) {
  250. setComplete();
  251. return;
  252. }
  253. _gotOne();
  254. }
  255. }
  256. else {
  257. if ( !matcher( _c )->matchesCurrent( _c.get() ) ) {
  258. }
  259. else if( !_c->getsetdup(_c->currLoc()) ) {
  260. _gotOne();
  261. }
  262. }
  263. _c->advance();
  264. }
  265. virtual QueryOp *_createChild() const {
  266. CountOp *ret = new CountOp( _ns , BSONObj() );
  267. ret->_count = _count;
  268. ret->_skip = _skip;
  269. ret->_limit = _limit;
  270. return ret;
  271. }
  272. long long count() const { return _count; }
  273. virtual bool mayRecordPlan() const {
  274. return !_yieldRecoveryFailed && ( ( _myCount > _limit / 2 ) || ( complete() && !stopRequested() ) );
  275. }
  276. private:
  277. void _gotOne() {
  278. if ( _skip ) {
  279. _skip--;
  280. return;
  281. }
  282. if ( _limit > 0 && _count >= _limit ) {
  283. setStop();
  284. return;
  285. }
  286. _count++;
  287. _myCount++;
  288. }
  289. string _ns;
  290. bool _capped;
  291. long long _count;
  292. long long _myCount;
  293. long long _skip;
  294. long long _limit;
  295. long long _nscanned;
  296. shared_ptr<Cursor> _c;
  297. BSONObj _query;
  298. BtreeCursor * _bc;
  299. BSONObj _firstMatch;
  300. ClientCursor::CleanupPointer _cc;
  301. ClientCursor::YieldData _yieldData;
  302. bool _yieldRecoveryFailed;
  303. };
  304. /* { count: "collectionname"[, query: <query>] }
  305. returns -1 on ns does not exist error.
  306. */
  307. long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
  308. Client::Context cx(ns);
  309. NamespaceDetails *d = nsdetails( ns );
  310. if ( !d ) {
  311. err = "ns missing";
  312. return -1;
  313. }
  314. BSONObj query = cmd.getObjectField("query");
  315. // count of all objects
  316. if ( query.isEmpty() ) {
  317. return applySkipLimit( d->stats.nrecords , cmd );
  318. }
  319. MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
  320. CountOp original( ns , cmd );
  321. shared_ptr< CountOp > res = mps.runOp( original );
  322. if ( !res->complete() ) {
  323. log() << "Count with ns: " << ns << " and query: " << query
  324. << " failed with exception: " << res->exception()
  325. << endl;
  326. return 0;
  327. }
  328. return res->count();
  329. }
  330. class ExplainBuilder {
  331. // Note: by default we filter out allPlans and oldPlan in the shell's
  332. // explain() function. If you add any recursive structures, make sure to
  333. // edit the JS to make sure everything gets filtered.
  334. public:
  335. ExplainBuilder() : _i() {}
  336. void ensureStartScan() {
  337. if ( !_a.get() ) {
  338. _a.reset( new BSONArrayBuilder() );
  339. }
  340. }
  341. void noteCursor( Cursor *c ) {
  342. BSONObjBuilder b( _a->subobjStart() );
  343. b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
  344. b.done();
  345. }
  346. void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder,
  347. int millis, bool hint, int nYields , int nChunkSkips , bool indexOnly ) {
  348. if ( _i == 1 ) {
  349. _c.reset( new BSONArrayBuilder() );
  350. *_c << _b->obj();
  351. }
  352. if ( _i == 0 ) {
  353. _b.reset( new BSONObjBuilder() );
  354. }
  355. else {
  356. _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
  357. }
  358. *_b << "cursor" << c->toString();
  359. _b->appendNumber( "nscanned", nscanned );
  360. _b->appendNumber( "nscannedObjects", nscannedObjects );
  361. *_b << "n" << n;
  362. if ( scanAndOrder )
  363. *_b << "scanAndOrder" << true;
  364. *_b << "millis" << millis;
  365. *_b << "nYields" << nYields;
  366. *_b << "nChunkSkips" << nChunkSkips;
  367. *_b << "isMultiKey" << c->isMultiKey();
  368. *_b << "indexOnly" << indexOnly;
  369. *_b << "indexBounds" << c->prettyIndexBounds();
  370. c->explainDetails( *_b );
  371. if ( !hint ) {
  372. *_b << "allPlans" << _a->arr();
  373. }
  374. if ( _i != 0 ) {
  375. _b->done();
  376. }
  377. _a.reset( 0 );
  378. ++_i;
  379. }
  380. BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
  381. if ( _i > 1 ) {
  382. BSONObjBuilder b;
  383. b << "clauses" << _c->arr();
  384. b.appendNumber( "nscanned", nscanned );
  385. b.appendNumber( "nscannedObjects", nscannedObjects );
  386. b << "n" << n;
  387. b << "millis" << millis;
  388. b.appendElements( suffix );
  389. return b.obj();
  390. }
  391. else {
  392. _b->appendElements( suffix );
  393. return _b->obj();
  394. }
  395. }
  396. private:
  397. auto_ptr< BSONArrayBuilder > _a;
  398. auto_ptr< BSONObjBuilder > _b;
  399. auto_ptr< BSONArrayBuilder > _c;
  400. int _i;
  401. };
  402. // Implements database 'query' requests using the query optimizer's QueryOp interface
  403. class UserQueryOp : public QueryOp {
  404. public:
  405. UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
  406. _buf( 32768 ) , // TODO be smarter here
  407. _pq( pq ) ,
  408. _ntoskip( pq.getSkip() ) ,
  409. _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
  410. _n(0),
  411. _oldN(0),
  412. _nYields(),
  413. _nChunkSkips(),
  414. _chunkManager( shardingState.needShardChunkManager(pq.ns()) ?
  415. shardingState.getShardChunkManager(pq.ns()) : ShardChunkManagerPtr() ),
  416. _inMemSort(false),
  417. _capped(false),
  418. _saveClientCursor(false),
  419. _wouldSaveClientCursor(false),
  420. _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
  421. _response( response ),
  422. _eb( eb ),
  423. _curop( curop ),
  424. _yieldRecoveryFailed()
  425. {}
  426. virtual void _init() {
  427. // only need to put the QueryResult fields there if we're building the first buffer in the message.
  428. if ( _response.empty() ) {
  429. _buf.skip( sizeof( QueryResult ) );
  430. }
  431. if ( _oplogReplay ) {
  432. _findingStartCursor.reset( new FindingStartCursor( qp() ) );
  433. _capped = true;
  434. }
  435. else {
  436. _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
  437. _capped = _c->capped();
  438. // setup check for if we can only use index to extract
  439. if ( _c->modifiedKeys() == false && _c->isMultiKey() == false && _pq.getFields() ) {
  440. _keyFieldsOnly.reset( _pq.getFields()->checkKey( _c->indexKeyPattern() ) );
  441. }
  442. }
  443. if ( qp().scanAndOrderRequired() ) {
  444. _inMemSort = true;
  445. _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder(), qp().multikeyFrs() ) );
  446. }
  447. if ( _pq.isExplain() ) {
  448. _eb.noteCursor( _c.get() );
  449. }
  450. }
  451. virtual bool prepareToYield() {
  452. if ( _findingStartCursor.get() ) {
  453. return _findingStartCursor->prepareToYield();
  454. }
  455. else {
  456. if ( _c && !_cc ) {
  457. _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
  458. }
  459. if ( _cc ) {
  460. return _cc->prepareToYield( _yieldData );
  461. }
  462. }
  463. // no active cursor - ok to yield
  464. return true;
  465. }
  466. virtual void recoverFromYield() {
  467. _nYields++;
  468. if ( _findingStartCursor.get() ) {
  469. _findingStartCursor->recoverFromYield();
  470. }
  471. else if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) {
  472. _yieldRecoveryFailed = true;
  473. _c.reset();
  474. _cc.reset();
  475. _so.reset();
  476. if ( _capped ) {
  477. msgassertedNoTrace( 13338, str::stream() << "capped cursor overrun during query: " << _pq.ns() );
  478. }
  479. else if ( qp().mustAssertOnYieldFailure() ) {
  480. msgassertedNoTrace( 15890, str::stream() << "UserQueryOp::recoverFromYield() failed to recover: " << _pq.ns() );
  481. }
  482. else {
  483. // we don't fail query since we're fine with returning partial data if collection dropped
  484. // todo: this is wrong. the cursor could be gone if closeAllDatabases command just ran
  485. }
  486. }
  487. }
  488. virtual long long nscanned() {
  489. if ( _findingStartCursor.get() ) {
  490. return 0; // should only be one query plan, so value doesn't really matter.
  491. }
  492. return _c.get() ? _c->nscanned() : _nscanned;
  493. }
  494. virtual void next() {
  495. if ( _findingStartCursor.get() ) {
  496. if ( !_findingStartCursor->done() ) {
  497. _findingStartCursor->next();
  498. }
  499. if ( _findingStartCursor->done() ) {
  500. _c = _findingStartCursor->cursor();
  501. _findingStartCursor.reset( 0 );
  502. }
  503. _capped = true;
  504. return;
  505. }
  506. if ( !_c || !_c->ok() ) {
  507. finish( false );
  508. return;
  509. }
  510. bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
  511. if( 0 ) {
  512. cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
  513. }
  514. if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ) {
  515. finish( true ); //?
  516. return;
  517. }
  518. _nscanned = _c->nscanned();
  519. if ( !matcher( _c )->matchesCurrent(_c.get() , &_details ) ) {
  520. // not a match, continue onward
  521. if ( _details._loadedObject )
  522. _nscannedObjects++;
  523. }
  524. else {
  525. _nscannedObjects++;
  526. DiskLoc cl = _c->currLoc();
  527. if ( _chunkManager && ! _chunkManager->belongsToMe( cl.obj() ) ) { // TODO: should make this covered at some point
  528. _nChunkSkips++;
  529. // log() << "TEMP skipping un-owned chunk: " << _c->current() << endl;
  530. }
  531. else if( _c->getsetdup(cl) ) {
  532. // dup
  533. }
  534. else {
  535. // got a match.
  536. if ( _inMemSort ) {
  537. // note: no cursors for non-indexed, ordered results. results must be fairly small.
  538. _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
  539. }
  540. else if ( _ntoskip > 0 ) {
  541. _ntoskip--;
  542. }
  543. else {
  544. if ( _pq.isExplain() ) {
  545. _n++;
  546. if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) {
  547. // .limit() was used, show just that much.
  548. finish( true ); //?
  549. return;
  550. }
  551. }
  552. else {
  553. if ( _pq.returnKey() ) {
  554. BSONObjBuilder bb( _buf );
  555. bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
  556. bb.done();
  557. }
  558. else if ( _keyFieldsOnly ) {
  559. fillQueryResultFromObj( _buf , 0 , _keyFieldsOnly->hydrate( _c->currKey() ) );
  560. }
  561. else {
  562. BSONObj js = _c->current();
  563. assert( js.isValid() );
  564. if ( _oplogReplay ) {
  565. BSONElement e = js["ts"];
  566. if ( e.type() == Date || e.type() == Timestamp )
  567. _slaveReadTill = e._opTime();
  568. }
  569. fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
  570. }
  571. _n++;
  572. if ( ! _c->supportGetMore() ) {
  573. if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ) {
  574. finish( true );
  575. return;
  576. }
  577. }
  578. else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ) {
  579. /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
  580. if ( mayCreateCursor1 ) {
  581. _wouldSaveClientCursor = true;
  582. if ( _c->advance() ) {
  583. // more...so save a cursor
  584. _saveClientCursor = true;
  585. }
  586. }
  587. finish( true );
  588. return;
  589. }
  590. }
  591. }
  592. }
  593. }
  594. _c->advance();
  595. }
  596. // this plan won, so set data for response broadly
  597. void finish( bool stop ) {
  598. massert( 13638, "client cursor dropped during explain query yield", !_pq.isExplain() || _c.get() );
  599. if ( _pq.isExplain() ) {
  600. _n = _inMemSort ? _so->size() : _n;
  601. }
  602. else if ( _inMemSort ) {
  603. if( _so.get() )
  604. _so->fill( _buf, _pq.getFields() , _n );
  605. }
  606. if ( _c.get() ) {
  607. _nscanned = _c->nscanned();
  608. if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
  609. _c->setTailable();
  610. // If the tailing request succeeded.
  611. if ( _c->tailable() )
  612. _saveClientCursor = true;
  613. }
  614. if ( _pq.isExplain() ) {
  615. _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(),
  616. _curop.elapsedMillis(), useHints && !_pq.getHint().eoo(), _nYields ,
  617. _nChunkSkips, _keyFieldsOnly.get() > 0 );
  618. }
  619. else {
  620. if ( _buf.len() ) {
  621. _response.appendData( _buf.buf(), _buf.len() );
  622. _buf.decouple();
  623. }
  624. }
  625. if ( stop ) {
  626. setStop();
  627. }
  628. else {
  629. setComplete();
  630. }
  631. }
  632. void finishExplain( const BSONObj &suffix ) {
  633. BSONObj obj = _eb.finishWithSuffix( totalNscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
  634. fillQueryResultFromObj(_buf, 0, obj);
  635. _n = 1;
  636. _oldN = 0;
  637. _response.appendData( _buf.buf(), _buf.len() );
  638. _buf.decouple();
  639. }
  640. virtual bool mayRecordPlan() const {
  641. return !_yieldRecoveryFailed && ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) );
  642. }
  643. virtual QueryOp *_createChild() const {
  644. if ( _pq.isExplain() ) {
  645. _eb.ensureStartScan();
  646. }
  647. UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
  648. ret->_oldN = n();
  649. ret->_oldNscanned = totalNscanned();
  650. ret->_oldNscannedObjects = nscannedObjects();
  651. ret->_ntoskip = _ntoskip;
  652. return ret;
  653. }
  654. bool scanAndOrderRequired() const { return _inMemSort; }
  655. shared_ptr<Cursor> cursor() { return _c; }
  656. int n() const { return _oldN + _n; }
  657. long long totalNscanned() const { return _nscanned + _oldNscanned; }
  658. long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
  659. bool saveClientCursor() const { return _saveClientCursor; }
  660. bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
  661. void finishForOplogReplay( ClientCursor * cc ) {
  662. if ( _oplogReplay && ! _slaveReadTill.isNull() )
  663. cc->slaveReadTill( _slaveReadTill );
  664. }
  665. ShardChunkManagerPtr getChunkManager(){ return _chunkManager; }
  666. private:
  667. BufBuilder _buf;
  668. const ParsedQuery& _pq;
  669. scoped_ptr<Projection::KeyOnly> _keyFieldsOnly;
  670. long long _ntoskip;
  671. long long _nscanned;
  672. long long _oldNscanned;
  673. long long _nscannedObjects;
  674. long long _oldNscannedObjects;
  675. int _n; // found so far
  676. int _oldN;
  677. int _nYields;
  678. int _nChunkSkips;
  679. MatchDetails _details;
  680. ShardChunkManagerPtr _chunkManager;
  681. bool _inMemSort;
  682. auto_ptr< ScanAndOrder > _so;
  683. shared_ptr<Cursor> _c;
  684. ClientCursor::CleanupPointer _cc;
  685. ClientCursor::YieldData _yieldData;
  686. bool _capped;
  687. bool _saveClientCursor;
  688. bool _wouldSaveClientCursor;
  689. bool _oplogReplay;
  690. auto_ptr< FindingStartCursor > _findingStartCursor;
  691. Message &_response;
  692. ExplainBuilder &_eb;
  693. CurOp &_curop;
  694. OpTime _slaveReadTill;
  695. bool _yieldRecoveryFailed;
  696. };
  697. /* run a query -- includes checking for and running a Command \
  698. @return points to ns if exhaust mode. 0=normal mode
  699. */
  700. const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
  701. shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
  702. ParsedQuery& pq( *pq_shared );
  703. int ntoskip = q.ntoskip;
  704. BSONObj jsobj = q.query;
  705. int queryOptions = q.queryOptions;
  706. const char *ns = q.ns;
  707. if( logLevel >= 2 )
  708. log() << "runQuery called " << ns << " " << jsobj << endl;
  709. curop.debug().ns = ns;
  710. curop.debug().ntoreturn = pq.getNumToReturn();
  711. curop.debug().query = jsobj;
  712. curop.setQuery(jsobj);
  713. if ( pq.couldBeCommand() ) {
  714. BufBuilder bb;
  715. bb.skip(sizeof(QueryResult));
  716. BSONObjBuilder cmdResBuf;
  717. if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
  718. curop.debug().iscommand = true;
  719. curop.debug().query = jsobj;
  720. curop.markCommand();
  721. auto_ptr< QueryResult > qr;
  722. qr.reset( (QueryResult *) bb.buf() );
  723. bb.decouple();
  724. qr->setResultFlagsToOk();
  725. qr->len = bb.len();
  726. curop.debug().responseLength = bb.len();
  727. qr->setOperation(opReply);
  728. qr->cursorId = 0;
  729. qr->startingFrom = 0;
  730. qr->nReturned = 1;
  731. result.setData( qr.release(), true );
  732. }
  733. else {
  734. uasserted(13530, "bad or malformed command request?");
  735. }
  736. return 0;
  737. }
  738. /* --- regular query --- */
  739. int n = 0;
  740. BSONElement hint = useHints ? pq.getHint() : BSONElement();
  741. bool explain = pq.isExplain();
  742. bool snapshot = pq.isSnapshot();
  743. BSONObj order = pq.getOrder();
  744. BSONObj query = pq.getFilter();
  745. /* The ElemIter will not be happy if this isn't really an object. So throw exception
  746. here when that is true.
  747. (Which may indicate bad data from client.)
  748. */
  749. if ( query.objsize() == 0 ) {
  750. out() << "Bad query object?\n jsobj:";
  751. out() << jsobj.toString() << "\n query:";
  752. out() << query.toString() << endl;
  753. uassert( 10110 , "bad query object", false);
  754. }
  755. /* --- read lock --- */
  756. mongolock lk(false);
  757. Client::Context ctx( ns , dbpath , &lk );
  758. replVerifyReadsOk(pq);
  759. if ( pq.hasOption( QueryOption_CursorTailable ) ) {
  760. NamespaceDetails *d = nsdetails( ns );
  761. uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped );
  762. const BSONObj nat1 = BSON( "$natural" << 1 );
  763. if ( order.isEmpty() ) {
  764. order = nat1;
  765. }
  766. else {
  767. uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
  768. }
  769. }
  770. BSONObj snapshotHint; // put here to keep the data in scope
  771. if( snapshot ) {
  772. NamespaceDetails *d = nsdetails(ns);
  773. if ( d ) {
  774. int i = d->findIdIndex();
  775. if( i < 0 ) {
  776. if ( strstr( ns , ".system." ) == 0 )
  777. log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
  778. }
  779. else {
  780. /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here.
  781. probably need a better way to specify "use the _id index" as a hint. if someone is
  782. in the query optimizer please fix this then!
  783. */
  784. BSONObjBuilder b;
  785. b.append("$hint", d->idx(i).indexName());
  786. snapshotHint = b.obj();
  787. hint = snapshotHint.firstElement();
  788. }
  789. }
  790. }
  791. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
  792. bool nsFound = false;
  793. bool indexFound = false;
  794. BSONObj resObject;
  795. Client& c = cc();
  796. bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
  797. if ( nsFound == false || indexFound == true ) {
  798. if ( shardingState.needShardChunkManager( ns ) ) {
  799. ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns );
  800. if ( m && ! m->belongsToMe( resObject ) ) {
  801. // I have something this _id
  802. // but it doesn't belong to me
  803. // so return nothing
  804. resObject = BSONObj();
  805. found = false;
  806. }
  807. }
  808. BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
  809. bb.skip(sizeof(QueryResult));
  810. curop.debug().idhack = true;
  811. if ( found ) {
  812. n = 1;
  813. fillQueryResultFromObj( bb , pq.getFields() , resObject );
  814. }
  815. auto_ptr< QueryResult > qr;
  816. qr.reset( (QueryResult *) bb.buf() );
  817. bb.decouple();
  818. qr->setResultFlagsToOk();
  819. qr->len = bb.len();
  820. curop.debug().responseLength = bb.len();
  821. qr->setOperation(opReply);
  822. qr->cursorId = 0;
  823. qr->startingFrom = 0;
  824. qr->nReturned = n;
  825. result.setData( qr.release(), true );
  826. return NULL;
  827. }
  828. }
  829. // regular, not QO bypass query
  830. BSONObj oldPlan;
  831. if ( explain && ! pq.hasIndexSpecifier() ) {
  832. MultiPlanScanner mps( ns, query, order );
  833. if ( mps.usingPrerecordedPlan() )
  834. oldPlan = mps.oldExplain();
  835. }
  836. auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) );
  837. BSONObj explainSuffix;
  838. if ( explain ) {
  839. BSONObjBuilder bb;
  840. if ( !oldPlan.isEmpty() )
  841. bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
  842. explainSuffix = bb.obj();
  843. }
  844. ExplainBuilder eb;
  845. UserQueryOp original( pq, result, eb, curop );
  846. shared_ptr< UserQueryOp > o = mps->runOp( original );
  847. UserQueryOp &dqo = *o;
  848. if ( ! dqo.complete() )
  849. throw MsgAssertionException( dqo.exception() );
  850. if ( explain ) {
  851. dqo.finishExplain( explainSuffix );
  852. }
  853. n = dqo.n();
  854. long long nscanned = dqo.totalNscanned();
  855. curop.debug().scanAndOrder = dqo.scanAndOrderRequired();
  856. shared_ptr<Cursor> cursor = dqo.cursor();
  857. if( logLevel >= 5 )
  858. log() << " used cursor: " << cursor.get() << endl;
  859. long long cursorid = 0;
  860. const char * exhaust = 0;
  861. if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) {
  862. ClientCursor *cc;
  863. bool moreClauses = mps->mayRunMore();
  864. if ( moreClauses ) {
  865. // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
  866. shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher( cursor ), dqo ) );
  867. cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned());
  868. }
  869. else {
  870. if( ! cursor->matcher() ) cursor->setMatcher( dqo.matcher( cursor ) );
  871. cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() );
  872. }
  873. cc->setChunkManager( dqo.getChunkManager() );
  874. cursorid = cc->cursorid();
  875. DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
  876. cc->setPos( n );
  877. cc->pq = pq_shared;
  878. cc->fields = pq.getFieldPtr();
  879. cc->originalMessage = m;
  880. cc->updateLocation();
  881. if ( !cc->ok() && cc->c()->tailable() )
  882. DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
  883. if( queryOptions & QueryOption_Exhaust ) {
  884. exhaust = ns;
  885. curop.debug().exhaust = true;
  886. }
  887. dqo.finishForOplogReplay(cc);
  888. }
  889. QueryResult *qr = (QueryResult *) result.header();
  890. qr->cursorId = cursorid;
  891. qr->setResultFlagsToOk();
  892. // qr->len is updated automatically by appendData()
  893. curop.debug().responseLength = qr->len;
  894. qr->setOperation(opReply);
  895. qr->startingFrom = 0;
  896. qr->nReturned = n;
  897. int duration = curop.elapsedMillis();
  898. bool dbprofile = curop.shouldDBProfile( duration );
  899. if ( dbprofile || duration >= cmdLine.slowMS ) {
  900. curop.debug().nscanned = (int) nscanned;
  901. curop.debug().ntoskip = ntoskip;
  902. }
  903. curop.debug().nreturned = n;
  904. return exhaust;
  905. }
  906. } // namespace mongo