PageRenderTime 72ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 1ms

/src/mongo/db/ops/query.cpp

https://github.com/iandaniel/mongo
C++ | 1060 lines | 821 code | 154 blank | 85 comment | 140 complexity | 1438fecb55a24e0a87361ae8cde6d128 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. // query.cpp
  2. /**
  3. * Copyright (C) 2008 10gen Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License, version 3,
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "pch.h"
  18. #include "mongo/db/ops/query.h"
  19. #include "mongo/bson/util/builder.h"
  20. #include "mongo/db/clientcursor.h"
  21. #include "mongo/db/commands.h"
  22. #include "mongo/db/oplog.h"
  23. #include "mongo/db/pagefault.h"
  24. #include "mongo/db/pdfile.h"
  25. #include "mongo/db/queryoptimizer.h"
  26. #include "mongo/db/queryoptimizercursor.h"
  27. #include "mongo/db/replutil.h"
  28. #include "mongo/db/scanandorder.h"
  29. #include "mongo/s/d_logic.h"
  30. #include "mongo/s/stale_exception.h" // for SendStaleConfigException
  31. #include "mongo/server.h"
  32. namespace mongo {
  33. /* We cut off further objects once we cross this threshold; thus, you might get
  34. a little bit more than this, it is a threshold rather than a limit.
  35. */
  36. const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
  37. bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
  38. try {
  39. return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
  40. }
  41. catch( SendStaleConfigException& ){
  42. throw;
  43. }
  44. catch ( AssertionException& e ) {
  45. verify( e.getCode() != SendStaleConfigCode && e.getCode() != RecvStaleConfigCode );
  46. e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
  47. curop.debug().exceptionInfo = e.getInfo();
  48. }
  49. anObjBuilder.append("errmsg", "db assertion failure");
  50. anObjBuilder.append("ok", 0.0);
  51. BSONObj x = anObjBuilder.done();
  52. b.appendBuf((void*) x.objdata(), x.objsize());
  53. return true;
  54. }
  55. BSONObj id_obj = fromjson("{\"_id\":1}");
  56. BSONObj empty_obj = fromjson("{}");
  57. //int dump = 0;
  58. /* empty result for error conditions */
  59. QueryResult* emptyMoreResult(long long cursorid) {
  60. BufBuilder b(32768);
  61. b.skip(sizeof(QueryResult));
  62. QueryResult *qr = (QueryResult *) b.buf();
  63. qr->cursorId = 0; // 0 indicates no more data to retrieve.
  64. qr->startingFrom = 0;
  65. qr->len = b.len();
  66. qr->setOperation(opReply);
  67. qr->initializeResultFlags();
  68. qr->nReturned = 0;
  69. b.decouple();
  70. return qr;
  71. }
  72. QueryResult* processGetMore(const char* ns,
  73. int ntoreturn,
  74. long long cursorid,
  75. CurOp& curop,
  76. int pass,
  77. bool& exhaust,
  78. bool* isCursorAuthorized ) {
  79. exhaust = false;
  80. int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;
  81. BufBuilder b( bufSize );
  82. b.skip(sizeof(QueryResult));
  83. int resultFlags = ResultFlag_AwaitCapable;
  84. int start = 0;
  85. int n = 0;
  86. Client::ReadContext ctx(ns);
  87. // call this readlocked so state can't change
  88. replVerifyReadsOk();
  89. ClientCursor::Pin p(cursorid);
  90. ClientCursor *cc = p.c();
  91. if ( unlikely(!cc) ) {
  92. LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl;
  93. cursorid = 0;
  94. resultFlags = ResultFlag_CursorNotFound;
  95. }
  96. else {
  97. // check for spoofing of the ns such that it does not match the one originally there for the cursor
  98. uassert(14833, "auth error", str::equals(ns, cc->ns().c_str()));
  99. *isCursorAuthorized = true;
  100. if ( pass == 0 )
  101. cc->updateSlaveLocation( curop );
  102. int queryOptions = cc->queryOptions();
  103. curop.debug().query = cc->query();
  104. curop.setQuery( cc->query() );
  105. start = cc->pos();
  106. Cursor *c = cc->c();
  107. c->recoverFromYield();
  108. DiskLoc last;
  109. // This manager may be stale, but it's the state of chunking when the cursor was created.
  110. ShardChunkManagerPtr manager = cc->getChunkManager();
  111. while ( 1 ) {
  112. if ( !c->ok() ) {
  113. if ( c->tailable() ) {
  114. /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
  115. advance() can still be retries as a reactivation attempt. when there is new data, it will
  116. return true. that's what we are doing here.
  117. */
  118. if ( c->advance() )
  119. continue;
  120. if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
  121. return 0;
  122. }
  123. break;
  124. }
  125. p.release();
  126. bool ok = ClientCursor::erase(cursorid);
  127. verify(ok);
  128. cursorid = 0;
  129. cc = 0;
  130. break;
  131. }
  132. MatchDetails details;
  133. if ( cc->fields && cc->fields->getArrayOpType() == Projection::ARRAY_OP_POSITIONAL ) {
  134. // field projection specified, and contains an array operator
  135. details.requestElemMatchKey();
  136. }
  137. // in some cases (clone collection) there won't be a matcher
  138. if ( !c->currentMatches( &details ) ) {
  139. }
  140. else if ( manager && ! manager->belongsToMe( cc ) ){
  141. LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl;
  142. }
  143. else {
  144. if( c->getsetdup(c->currLoc()) ) {
  145. //out() << " but it's a dup \n";
  146. }
  147. else {
  148. last = c->currLoc();
  149. n++;
  150. cc->fillQueryResultFromObj( b, &details );
  151. if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
  152. c->advance();
  153. cc->incPos( n );
  154. break;
  155. }
  156. }
  157. }
  158. c->advance();
  159. if ( ! cc->yieldSometimes( ( c->ok() && c->keyFieldsOnly() ) ?
  160. ClientCursor::DontNeed : ClientCursor::WillNeed ) ) {
  161. ClientCursor::erase(cursorid);
  162. cursorid = 0;
  163. cc = 0;
  164. break;
  165. }
  166. }
  167. if ( cc ) {
  168. if ( c->supportYields() ) {
  169. ClientCursor::YieldData data;
  170. verify( cc->prepareToYield( data ) );
  171. }
  172. else {
  173. cc->c()->noteLocation();
  174. }
  175. cc->mayUpgradeStorage();
  176. cc->storeOpForSlave( last );
  177. exhaust = cc->queryOptions() & QueryOption_Exhaust;
  178. }
  179. }
  180. QueryResult *qr = (QueryResult *) b.buf();
  181. qr->len = b.len();
  182. qr->setOperation(opReply);
  183. qr->_resultFlags() = resultFlags;
  184. qr->cursorId = cursorid;
  185. qr->startingFrom = start;
  186. qr->nReturned = n;
  187. b.decouple();
  188. return qr;
  189. }
  190. ResultDetails::ResultDetails() :
  191. match(),
  192. orderedMatch(),
  193. loadedRecord(),
  194. chunkSkip() {
  195. }
  196. ExplainRecordingStrategy::ExplainRecordingStrategy
  197. ( const ExplainQueryInfo::AncillaryInfo &ancillaryInfo ) :
  198. _ancillaryInfo( ancillaryInfo ) {
  199. }
  200. shared_ptr<ExplainQueryInfo> ExplainRecordingStrategy::doneQueryInfo() {
  201. shared_ptr<ExplainQueryInfo> ret = _doneQueryInfo();
  202. ret->setAncillaryInfo( _ancillaryInfo );
  203. return ret;
  204. }
  205. NoExplainStrategy::NoExplainStrategy() :
  206. ExplainRecordingStrategy( ExplainQueryInfo::AncillaryInfo() ) {
  207. }
  208. shared_ptr<ExplainQueryInfo> NoExplainStrategy::_doneQueryInfo() {
  209. verify( false );
  210. return shared_ptr<ExplainQueryInfo>();
  211. }
  212. MatchCountingExplainStrategy::MatchCountingExplainStrategy
  213. ( const ExplainQueryInfo::AncillaryInfo &ancillaryInfo ) :
  214. ExplainRecordingStrategy( ancillaryInfo ),
  215. _orderedMatches() {
  216. }
  217. void MatchCountingExplainStrategy::noteIterate( const ResultDetails& resultDetails ) {
  218. _noteIterate( resultDetails );
  219. if ( resultDetails.orderedMatch ) {
  220. ++_orderedMatches;
  221. }
  222. }
  223. SimpleCursorExplainStrategy::SimpleCursorExplainStrategy
  224. ( const ExplainQueryInfo::AncillaryInfo &ancillaryInfo,
  225. const shared_ptr<Cursor> &cursor ) :
  226. MatchCountingExplainStrategy( ancillaryInfo ),
  227. _cursor( cursor ),
  228. _explainInfo( new ExplainSinglePlanQueryInfo() ) {
  229. }
  230. void SimpleCursorExplainStrategy::notePlan( bool scanAndOrder, bool indexOnly ) {
  231. _explainInfo->notePlan( *_cursor, scanAndOrder, indexOnly );
  232. }
  233. void SimpleCursorExplainStrategy::_noteIterate( const ResultDetails& resultDetails ) {
  234. _explainInfo->noteIterate( resultDetails.match,
  235. resultDetails.loadedRecord,
  236. resultDetails.chunkSkip,
  237. *_cursor );
  238. }
  239. void SimpleCursorExplainStrategy::noteYield() {
  240. _explainInfo->noteYield();
  241. }
  242. shared_ptr<ExplainQueryInfo> SimpleCursorExplainStrategy::_doneQueryInfo() {
  243. _explainInfo->noteDone( *_cursor );
  244. return _explainInfo->queryInfo();
  245. }
  246. QueryOptimizerCursorExplainStrategy::QueryOptimizerCursorExplainStrategy
  247. ( const ExplainQueryInfo::AncillaryInfo &ancillaryInfo,
  248. const shared_ptr<QueryOptimizerCursor> &cursor ) :
  249. MatchCountingExplainStrategy( ancillaryInfo ),
  250. _cursor( cursor ) {
  251. }
  252. void QueryOptimizerCursorExplainStrategy::_noteIterate( const ResultDetails& resultDetails ) {
  253. // Note ordered matches only; if an unordered plan is selected, the explain result will
  254. // be updated with reviseN().
  255. _cursor->noteIterate( resultDetails.orderedMatch,
  256. resultDetails.loadedRecord,
  257. resultDetails.chunkSkip );
  258. }
  259. void QueryOptimizerCursorExplainStrategy::noteYield() {
  260. _cursor->noteYield();
  261. }
  262. shared_ptr<ExplainQueryInfo> QueryOptimizerCursorExplainStrategy::_doneQueryInfo() {
  263. return _cursor->explainQueryInfo();
  264. }
  265. ResponseBuildStrategy::ResponseBuildStrategy( const ParsedQuery &parsedQuery,
  266. const shared_ptr<Cursor> &cursor,
  267. BufBuilder &buf ) :
  268. _parsedQuery( parsedQuery ),
  269. _cursor( cursor ),
  270. _queryOptimizerCursor( dynamic_pointer_cast<QueryOptimizerCursor>( _cursor ) ),
  271. _buf( buf ) {
  272. }
  273. void ResponseBuildStrategy::resetBuf() {
  274. _buf.reset();
  275. _buf.skip( sizeof( QueryResult ) );
  276. }
  277. BSONObj ResponseBuildStrategy::current( bool allowCovered,
  278. ResultDetails* resultDetails ) const {
  279. if ( _parsedQuery.returnKey() ) {
  280. BSONObjBuilder bob;
  281. bob.appendKeys( _cursor->indexKeyPattern(), _cursor->currKey() );
  282. return bob.obj();
  283. }
  284. if ( allowCovered ) {
  285. const Projection::KeyOnly *keyFieldsOnly = _cursor->keyFieldsOnly();
  286. if ( keyFieldsOnly ) {
  287. return keyFieldsOnly->hydrate( _cursor->currKey() );
  288. }
  289. }
  290. resultDetails->loadedRecord = true;
  291. BSONObj ret = _cursor->current();
  292. verify( ret.isValid() );
  293. return ret;
  294. }
  295. OrderedBuildStrategy::OrderedBuildStrategy( const ParsedQuery &parsedQuery,
  296. const shared_ptr<Cursor> &cursor,
  297. BufBuilder &buf ) :
  298. ResponseBuildStrategy( parsedQuery, cursor, buf ),
  299. _skip( _parsedQuery.getSkip() ),
  300. _bufferedMatches() {
  301. }
  302. bool OrderedBuildStrategy::handleMatch( ResultDetails* resultDetails ) {
  303. DiskLoc loc = _cursor->currLoc();
  304. if ( _cursor->getsetdup( loc ) ) {
  305. return false;
  306. }
  307. if ( _skip > 0 ) {
  308. --_skip;
  309. return false;
  310. }
  311. BSONObj currentDocument = current( true, resultDetails );
  312. // Explain does not obey soft limits, so matches should not be buffered.
  313. if ( !_parsedQuery.isExplain() ) {
  314. fillQueryResultFromObj( _buf, _parsedQuery.getFields(),
  315. currentDocument, &resultDetails->matchDetails,
  316. ( _parsedQuery.showDiskLoc() ? &loc : 0 ) );
  317. ++_bufferedMatches;
  318. }
  319. resultDetails->match = true;
  320. resultDetails->orderedMatch = true;
  321. return true;
  322. }
  323. ReorderBuildStrategy* ReorderBuildStrategy::make( const ParsedQuery& parsedQuery,
  324. const shared_ptr<Cursor>& cursor,
  325. BufBuilder& buf,
  326. const QueryPlanSummary& queryPlan ) {
  327. auto_ptr<ReorderBuildStrategy> ret( new ReorderBuildStrategy( parsedQuery, cursor, buf ) );
  328. ret->init( queryPlan );
  329. return ret.release();
  330. }
  331. ReorderBuildStrategy::ReorderBuildStrategy( const ParsedQuery &parsedQuery,
  332. const shared_ptr<Cursor> &cursor,
  333. BufBuilder &buf ) :
  334. ResponseBuildStrategy( parsedQuery, cursor, buf ),
  335. _bufferedMatches() {
  336. }
  337. void ReorderBuildStrategy::init( const QueryPlanSummary &queryPlan ) {
  338. _scanAndOrder.reset( newScanAndOrder( queryPlan ) );
  339. }
  340. bool ReorderBuildStrategy::handleMatch( ResultDetails* resultDetails ) {
  341. if ( _cursor->getsetdup( _cursor->currLoc() ) ) {
  342. return false;
  343. }
  344. _handleMatchNoDedup( resultDetails );
  345. resultDetails->match = true;
  346. return true;
  347. }
  348. void ReorderBuildStrategy::_handleMatchNoDedup( ResultDetails* resultDetails ) {
  349. DiskLoc loc = _cursor->currLoc();
  350. _scanAndOrder->add( current( false, resultDetails ),
  351. _parsedQuery.showDiskLoc() ? &loc : 0 );
  352. }
  353. int ReorderBuildStrategy::rewriteMatches() {
  354. cc().curop()->debug().scanAndOrder = true;
  355. int ret = 0;
  356. _scanAndOrder->fill( _buf, &_parsedQuery, ret );
  357. _bufferedMatches = ret;
  358. return ret;
  359. }
  360. ScanAndOrder *
  361. ReorderBuildStrategy::newScanAndOrder( const QueryPlanSummary &queryPlan ) const {
  362. verify( !_parsedQuery.getOrder().isEmpty() );
  363. verify( _cursor->ok() );
  364. const FieldRangeSet *fieldRangeSet = 0;
  365. if ( queryPlan.valid() ) {
  366. fieldRangeSet = queryPlan._fieldRangeSetMulti.get();
  367. }
  368. else {
  369. verify( _queryOptimizerCursor );
  370. fieldRangeSet = _queryOptimizerCursor->initialFieldRangeSet();
  371. }
  372. verify( fieldRangeSet );
  373. return new ScanAndOrder( _parsedQuery.getSkip(),
  374. _parsedQuery.getNumToReturn(),
  375. _parsedQuery.getOrder(),
  376. *fieldRangeSet );
  377. }
  378. HybridBuildStrategy* HybridBuildStrategy::make( const ParsedQuery& parsedQuery,
  379. const shared_ptr<QueryOptimizerCursor>& cursor,
  380. BufBuilder& buf ) {
  381. auto_ptr<HybridBuildStrategy> ret( new HybridBuildStrategy( parsedQuery, cursor, buf ) );
  382. ret->init();
  383. return ret.release();
  384. }
  385. HybridBuildStrategy::HybridBuildStrategy( const ParsedQuery &parsedQuery,
  386. const shared_ptr<QueryOptimizerCursor> &cursor,
  387. BufBuilder &buf ) :
  388. ResponseBuildStrategy( parsedQuery, cursor, buf ),
  389. _orderedBuild( _parsedQuery, _cursor, _buf ),
  390. _reorderedMatches() {
  391. }
  392. void HybridBuildStrategy::init() {
  393. _reorderBuild.reset( ReorderBuildStrategy::make( _parsedQuery, _cursor, _buf,
  394. QueryPlanSummary() ) );
  395. }
  396. bool HybridBuildStrategy::handleMatch( ResultDetails* resultDetails ) {
  397. if ( !_queryOptimizerCursor->currentPlanScanAndOrderRequired() ) {
  398. return _orderedBuild.handleMatch( resultDetails );
  399. }
  400. return handleReorderMatch( resultDetails );
  401. }
  402. bool HybridBuildStrategy::handleReorderMatch( ResultDetails* resultDetails ) {
  403. DiskLoc loc = _cursor->currLoc();
  404. if ( _scanAndOrderDups.getsetdup( loc ) ) {
  405. return false;
  406. }
  407. resultDetails->match = true;
  408. try {
  409. _reorderBuild->_handleMatchNoDedup( resultDetails );
  410. } catch ( const UserException &e ) {
  411. if ( e.getCode() == ScanAndOrderMemoryLimitExceededAssertionCode ) {
  412. if ( _queryOptimizerCursor->hasPossiblyExcludedPlans() ) {
  413. _queryOptimizerCursor->clearIndexesForPatterns();
  414. throw QueryRetryException();
  415. }
  416. else if ( _queryOptimizerCursor->runningInitialInOrderPlan() ) {
  417. _queryOptimizerCursor->abortOutOfOrderPlans();
  418. return true;
  419. }
  420. }
  421. throw;
  422. }
  423. return true;
  424. }
  425. int HybridBuildStrategy::rewriteMatches() {
  426. if ( !_queryOptimizerCursor->completePlanOfHybridSetScanAndOrderRequired() ) {
  427. return _orderedBuild.rewriteMatches();
  428. }
  429. _reorderedMatches = true;
  430. resetBuf();
  431. return _reorderBuild->rewriteMatches();
  432. }
  433. int HybridBuildStrategy::bufferedMatches() const {
  434. return _reorderedMatches ?
  435. _reorderBuild->bufferedMatches() :
  436. _orderedBuild.bufferedMatches();
  437. }
  438. void HybridBuildStrategy::finishedFirstBatch() {
  439. _queryOptimizerCursor->abortOutOfOrderPlans();
  440. }
  441. QueryResponseBuilder *QueryResponseBuilder::make( const ParsedQuery &parsedQuery,
  442. const shared_ptr<Cursor> &cursor,
  443. const QueryPlanSummary &queryPlan,
  444. const BSONObj &oldPlan ) {
  445. auto_ptr<QueryResponseBuilder> ret( new QueryResponseBuilder( parsedQuery, cursor ) );
  446. ret->init( queryPlan, oldPlan );
  447. return ret.release();
  448. }
  449. QueryResponseBuilder::QueryResponseBuilder( const ParsedQuery &parsedQuery,
  450. const shared_ptr<Cursor> &cursor ) :
  451. _parsedQuery( parsedQuery ),
  452. _cursor( cursor ),
  453. _queryOptimizerCursor( dynamic_pointer_cast<QueryOptimizerCursor>( _cursor ) ),
  454. _buf( 32768 ) { // TODO be smarter here
  455. }
  456. void QueryResponseBuilder::init( const QueryPlanSummary &queryPlan, const BSONObj &oldPlan ) {
  457. _chunkManager = newChunkManager();
  458. _explain = newExplainRecordingStrategy( queryPlan, oldPlan );
  459. _builder = newResponseBuildStrategy( queryPlan );
  460. _builder->resetBuf();
  461. }
  462. bool QueryResponseBuilder::addMatch() {
  463. ResultDetails resultDetails;
  464. if ( _parsedQuery.getFields() && _parsedQuery.getFields()->getArrayOpType() == Projection::ARRAY_OP_POSITIONAL ) {
  465. // field projection specified, and contains an array operator
  466. resultDetails.matchDetails.requestElemMatchKey();
  467. }
  468. bool match =
  469. currentMatches( &resultDetails ) &&
  470. chunkMatches( &resultDetails ) &&
  471. _builder->handleMatch( &resultDetails );
  472. _explain->noteIterate( resultDetails );
  473. return match;
  474. }
  475. void QueryResponseBuilder::noteYield() {
  476. _explain->noteYield();
  477. }
  478. bool QueryResponseBuilder::enoughForFirstBatch() const {
  479. return _parsedQuery.enoughForFirstBatch( _builder->bufferedMatches(), _buf.len() );
  480. }
  481. bool QueryResponseBuilder::enoughTotalResults() const {
  482. if ( _parsedQuery.isExplain() ) {
  483. return _parsedQuery.enoughForExplain( _explain->orderedMatches() );
  484. }
  485. return ( _parsedQuery.enough( _builder->bufferedMatches() ) ||
  486. _buf.len() >= MaxBytesToReturnToClientAtOnce );
  487. }
  488. void QueryResponseBuilder::finishedFirstBatch() {
  489. _builder->finishedFirstBatch();
  490. }
  491. int QueryResponseBuilder::handoff( Message &result ) {
  492. int rewriteCount = _builder->rewriteMatches();
  493. if ( _parsedQuery.isExplain() ) {
  494. shared_ptr<ExplainQueryInfo> explainInfo = _explain->doneQueryInfo();
  495. if ( rewriteCount != -1 ) {
  496. explainInfo->reviseN( rewriteCount );
  497. }
  498. _builder->resetBuf();
  499. fillQueryResultFromObj( _buf, 0, explainInfo->bson() );
  500. result.appendData( _buf.buf(), _buf.len() );
  501. _buf.decouple();
  502. return 1;
  503. }
  504. if ( _buf.len() > 0 ) {
  505. result.appendData( _buf.buf(), _buf.len() );
  506. _buf.decouple();
  507. }
  508. return _builder->bufferedMatches();
  509. }
  510. ShardChunkManagerPtr QueryResponseBuilder::newChunkManager() const {
  511. if ( !shardingState.needShardChunkManager( _parsedQuery.ns() ) ) {
  512. return ShardChunkManagerPtr();
  513. }
  514. return shardingState.getShardChunkManager( _parsedQuery.ns() );
  515. }
  516. shared_ptr<ExplainRecordingStrategy> QueryResponseBuilder::newExplainRecordingStrategy
  517. ( const QueryPlanSummary &queryPlan, const BSONObj &oldPlan ) const {
  518. if ( !_parsedQuery.isExplain() ) {
  519. return shared_ptr<ExplainRecordingStrategy>( new NoExplainStrategy() );
  520. }
  521. ExplainQueryInfo::AncillaryInfo ancillaryInfo;
  522. ancillaryInfo._oldPlan = oldPlan;
  523. if ( _queryOptimizerCursor ) {
  524. return shared_ptr<ExplainRecordingStrategy>
  525. ( new QueryOptimizerCursorExplainStrategy( ancillaryInfo, _queryOptimizerCursor ) );
  526. }
  527. shared_ptr<ExplainRecordingStrategy> ret
  528. ( new SimpleCursorExplainStrategy( ancillaryInfo, _cursor ) );
  529. ret->notePlan( queryPlan.valid() && queryPlan._scanAndOrderRequired,
  530. queryPlan._keyFieldsOnly );
  531. return ret;
  532. }
  533. shared_ptr<ResponseBuildStrategy> QueryResponseBuilder::newResponseBuildStrategy
  534. ( const QueryPlanSummary &queryPlan ) {
  535. bool unordered = _parsedQuery.getOrder().isEmpty();
  536. bool empty = !_cursor->ok();
  537. bool singlePlan = !_queryOptimizerCursor;
  538. bool singleOrderedPlan =
  539. singlePlan && ( !queryPlan.valid() || !queryPlan._scanAndOrderRequired );
  540. CandidatePlanCharacter queryOptimizerPlans;
  541. if ( _queryOptimizerCursor ) {
  542. queryOptimizerPlans = _queryOptimizerCursor->initialCandidatePlans();
  543. }
  544. if ( unordered ||
  545. empty ||
  546. singleOrderedPlan ||
  547. ( !singlePlan && !queryOptimizerPlans.mayRunOutOfOrderPlan() ) ) {
  548. return shared_ptr<ResponseBuildStrategy>
  549. ( new OrderedBuildStrategy( _parsedQuery, _cursor, _buf ) );
  550. }
  551. if ( singlePlan ||
  552. !queryOptimizerPlans.mayRunInOrderPlan() ) {
  553. return shared_ptr<ResponseBuildStrategy>
  554. ( ReorderBuildStrategy::make( _parsedQuery, _cursor, _buf, queryPlan ) );
  555. }
  556. return shared_ptr<ResponseBuildStrategy>
  557. ( HybridBuildStrategy::make( _parsedQuery, _queryOptimizerCursor, _buf ) );
  558. }
  559. bool QueryResponseBuilder::currentMatches( ResultDetails* resultDetails ) {
  560. bool matches = _cursor->currentMatches( &resultDetails->matchDetails );
  561. if ( resultDetails->matchDetails.hasLoadedRecord() ) {
  562. resultDetails->loadedRecord = true;
  563. }
  564. return matches;
  565. }
  566. bool QueryResponseBuilder::chunkMatches( ResultDetails* resultDetails ) {
  567. if ( !_chunkManager ) {
  568. return true;
  569. }
  570. // TODO: should make this covered at some point
  571. resultDetails->loadedRecord = true;
  572. if ( _chunkManager->belongsToMe( _cursor->current() ) ) {
  573. return true;
  574. }
  575. resultDetails->chunkSkip = true;
  576. return false;
  577. }
  578. /**
  579. * Run a query with a cursor provided by the query optimizer, or FindingStartCursor.
  580. * @yields the db lock.
  581. */
  582. string queryWithQueryOptimizer( int queryOptions, const string& ns,
  583. const BSONObj &jsobj, CurOp& curop,
  584. const BSONObj &query, const BSONObj &order,
  585. const shared_ptr<ParsedQuery> &pq_shared,
  586. const BSONObj &oldPlan,
  587. const ConfigVersion &shardingVersionAtStart,
  588. scoped_ptr<PageFaultRetryableSection>& parentPageFaultSection,
  589. scoped_ptr<NoPageFaultsAllowed>& noPageFault,
  590. Message &result ) {
  591. const ParsedQuery &pq( *pq_shared );
  592. shared_ptr<Cursor> cursor;
  593. QueryPlanSummary queryPlan;
  594. if ( pq.hasOption( QueryOption_OplogReplay ) ) {
  595. cursor = FindingStartCursor::getCursor( ns.c_str(), query, order );
  596. }
  597. else {
  598. cursor =
  599. NamespaceDetailsTransient::getCursor( ns.c_str(),
  600. query,
  601. order,
  602. QueryPlanSelectionPolicy::any(),
  603. pq_shared,
  604. false,
  605. &queryPlan );
  606. }
  607. verify( cursor );
  608. scoped_ptr<QueryResponseBuilder> queryResponseBuilder
  609. ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
  610. bool saveClientCursor = false;
  611. OpTime slaveReadTill;
  612. ClientCursor::Holder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
  613. ns ) );
  614. for( ; cursor->ok(); cursor->advance() ) {
  615. bool yielded = false;
  616. if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) ||
  617. !cursor->ok() ) {
  618. cursor.reset();
  619. queryResponseBuilder->noteYield();
  620. // !!! TODO The queryResponseBuilder still holds cursor. Currently it will not do
  621. // anything unsafe with the cursor in handoff(), but this is very fragile.
  622. //
  623. // We don't fail the query since we're fine with returning partial data if the
  624. // collection was dropped.
  625. // NOTE see SERVER-2454.
  626. // TODO This is wrong. The cursor could be gone if the closeAllDatabases command
  627. // just ran.
  628. break;
  629. }
  630. if ( yielded ) {
  631. queryResponseBuilder->noteYield();
  632. }
  633. if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {
  634. break;
  635. }
  636. if ( !queryResponseBuilder->addMatch() ) {
  637. continue;
  638. }
  639. // Note slave's position in the oplog.
  640. if ( pq.hasOption( QueryOption_OplogReplay ) ) {
  641. BSONObj current = cursor->current();
  642. BSONElement e = current["ts"];
  643. if ( e.type() == Date || e.type() == Timestamp ) {
  644. slaveReadTill = e._opTime();
  645. }
  646. }
  647. if ( !cursor->supportGetMore() || pq.isExplain() ) {
  648. if ( queryResponseBuilder->enoughTotalResults() ) {
  649. break;
  650. }
  651. }
  652. else if ( queryResponseBuilder->enoughForFirstBatch() ) {
  653. // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
  654. if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
  655. queryResponseBuilder->finishedFirstBatch();
  656. if ( cursor->advance() ) {
  657. saveClientCursor = true;
  658. }
  659. }
  660. break;
  661. }
  662. }
  663. if ( cursor ) {
  664. if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) {
  665. cursor->setTailable();
  666. }
  667. // If the tailing request succeeded.
  668. if ( cursor->tailable() ) {
  669. saveClientCursor = true;
  670. }
  671. }
  672. if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) {
  673. // if the version changed during the query
  674. // we might be missing some data
  675. // and its safe to send this as mongos can resend
  676. // at this point
  677. throw SendStaleConfigException( ns , "version changed during initial query", shardingVersionAtStart, shardingState.getVersion( ns ) );
  678. }
  679. parentPageFaultSection.reset(0);
  680. noPageFault.reset( new NoPageFaultsAllowed() );
  681. int nReturned = queryResponseBuilder->handoff( result );
  682. ccPointer.reset();
  683. long long cursorid = 0;
  684. if ( saveClientCursor ) {
  685. // Create a new ClientCursor, with a default timeout.
  686. ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
  687. jsobj.getOwned() ) );
  688. cursorid = ccPointer->cursorid();
  689. DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
  690. if ( cursor->supportYields() ) {
  691. ClientCursor::YieldData data;
  692. ccPointer->prepareToYield( data );
  693. }
  694. else {
  695. ccPointer->c()->noteLocation();
  696. }
  697. // Save slave's position in the oplog.
  698. if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) {
  699. ccPointer->slaveReadTill( slaveReadTill );
  700. }
  701. if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
  702. DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
  703. }
  704. if( queryOptions & QueryOption_Exhaust ) {
  705. curop.debug().exhaust = true;
  706. }
  707. // Set attributes for getMore.
  708. ccPointer->setChunkManager( queryResponseBuilder->chunkManager() );
  709. ccPointer->setPos( nReturned );
  710. ccPointer->pq = pq_shared;
  711. ccPointer->fields = pq.getFieldPtr();
  712. ccPointer.release();
  713. }
  714. QueryResult *qr = (QueryResult *) result.header();
  715. qr->cursorId = cursorid;
  716. curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId );
  717. qr->setResultFlagsToOk();
  718. // qr->len is updated automatically by appendData()
  719. curop.debug().responseLength = qr->len;
  720. qr->setOperation(opReply);
  721. qr->startingFrom = 0;
  722. qr->nReturned = nReturned;
  723. curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL );
  724. curop.debug().ntoskip = pq.getSkip();
  725. curop.debug().nreturned = nReturned;
  726. return curop.debug().exhaust ? ns : "";
  727. }
  728. bool queryIdHack( const char* ns, const BSONObj& query, const ParsedQuery& pq, CurOp& curop, Message& result ) {
  729. // notes:
  730. // do not touch result inside of PageFaultRetryableSection area
  731. Client& currentClient = cc(); // only here since its safe and takes time
  732. auto_ptr< QueryResult > qr;
  733. {
  734. // this extra bracing is not strictly needed
  735. // but makes it clear what the rules are in different spots
  736. scoped_ptr<PageFaultRetryableSection> pgfs;
  737. if ( ! currentClient.getPageFaultRetryableSection() )
  738. pgfs.reset( new PageFaultRetryableSection() );
  739. while ( 1 ) {
  740. try {
  741. int n = 0;
  742. bool nsFound = false;
  743. bool indexFound = false;
  744. BSONObj resObject; // put inside since we don't own the memory
  745. Client::ReadContext ctx( ns , dbpath ); // read locks
  746. replVerifyReadsOk(&pq);
  747. bool found = Helpers::findById( currentClient, ns, query, resObject, &nsFound, &indexFound );
  748. if ( nsFound && ! indexFound ) {
  749. // we have to resort to a table scan
  750. return false;
  751. }
  752. if ( shardingState.needShardChunkManager( ns ) ) {
  753. ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns );
  754. if ( m && ! m->belongsToMe( resObject ) ) {
  755. // I have something this _id
  756. // but it doesn't belong to me
  757. // so return nothing
  758. resObject = BSONObj();
  759. found = false;
  760. }
  761. }
  762. BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
  763. bb.skip(sizeof(QueryResult));
  764. curop.debug().idhack = true;
  765. if ( found ) {
  766. n = 1;
  767. fillQueryResultFromObj( bb , pq.getFields() , resObject );
  768. }
  769. qr.reset( (QueryResult *) bb.buf() );
  770. bb.decouple();
  771. qr->setResultFlagsToOk();
  772. qr->len = bb.len();
  773. curop.debug().responseLength = bb.len();
  774. qr->setOperation(opReply);
  775. qr->cursorId = 0;
  776. qr->startingFrom = 0;
  777. qr->nReturned = n;
  778. break;
  779. }
  780. catch ( PageFaultException& e ) {
  781. e.touch();
  782. }
  783. }
  784. }
  785. result.setData( qr.release(), true );
  786. return true;
  787. }
  788. /**
  789. * Run a query -- includes checking for and running a Command.
  790. * @return points to ns if exhaust mode. 0=normal mode
  791. * @locks the db mutex for reading (and potentially for writing temporarily to create a new db).
  792. * @yields the db mutex periodically after acquiring it.
  793. * @asserts on scan and order memory exhaustion and other cases.
  794. */
  795. string runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
  796. shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
  797. ParsedQuery& pq( *pq_shared );
  798. BSONObj jsobj = q.query;
  799. int queryOptions = q.queryOptions;
  800. const char *ns = q.ns;
  801. uassert( 16332 , "can't have an empty ns" , ns[0] );
  802. if( logLevel >= 2 )
  803. log() << "runQuery called " << ns << " " << jsobj << endl;
  804. curop.debug().ns = ns;
  805. curop.debug().ntoreturn = pq.getNumToReturn();
  806. curop.debug().query = jsobj;
  807. curop.setQuery(jsobj);
  808. const NamespaceString nsString( ns );
  809. uassert( 16256, str::stream() << "Invalid ns [" << ns << "]", nsString.isValid() );
  810. // Run a command.
  811. if ( pq.couldBeCommand() ) {
  812. curop.markCommand();
  813. BufBuilder bb;
  814. bb.skip(sizeof(QueryResult));
  815. BSONObjBuilder cmdResBuf;
  816. if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
  817. curop.debug().iscommand = true;
  818. curop.debug().query = jsobj;
  819. auto_ptr< QueryResult > qr;
  820. qr.reset( (QueryResult *) bb.buf() );
  821. bb.decouple();
  822. qr->setResultFlagsToOk();
  823. qr->len = bb.len();
  824. curop.debug().responseLength = bb.len();
  825. qr->setOperation(opReply);
  826. qr->cursorId = 0;
  827. qr->startingFrom = 0;
  828. qr->nReturned = 1;
  829. result.setData( qr.release(), true );
  830. }
  831. else {
  832. uasserted(13530, "bad or malformed command request?");
  833. }
  834. return "";
  835. }
  836. bool explain = pq.isExplain();
  837. BSONObj order = pq.getOrder();
  838. BSONObj query = pq.getFilter();
  839. /* The ElemIter will not be happy if this isn't really an object. So throw exception
  840. here when that is true.
  841. (Which may indicate bad data from client.)
  842. */
  843. if ( query.objsize() == 0 ) {
  844. out() << "Bad query object?\n jsobj:";
  845. out() << jsobj.toString() << "\n query:";
  846. out() << query.toString() << endl;
  847. uassert( 10110 , "bad query object", false);
  848. }
  849. // Run a simple id query.
  850. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
  851. if ( queryIdHack( ns, query, pq, curop, result ) ) {
  852. return "";
  853. }
  854. }
  855. // sanity check the query and projection
  856. if ( pq.getFields() != NULL )
  857. pq.getFields()->validateQuery( query );
  858. // these now may stored in a ClientCursor or somewhere else,
  859. // so make sure we use a real copy
  860. jsobj = jsobj.getOwned();
  861. query = query.getOwned();
  862. order = order.getOwned();
  863. bool hasRetried = false;
  864. scoped_ptr<PageFaultRetryableSection> pgfs;
  865. scoped_ptr<NoPageFaultsAllowed> npfe;
  866. while ( 1 ) {
  867. if ( ! cc().getPageFaultRetryableSection() ) {
  868. verify( ! pgfs );
  869. pgfs.reset( new PageFaultRetryableSection() );
  870. }
  871. try {
  872. Client::ReadContext ctx( ns , dbpath ); // read locks
  873. const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns );
  874. replVerifyReadsOk(&pq);
  875. if ( pq.hasOption( QueryOption_CursorTailable ) ) {
  876. NamespaceDetails *d = nsdetails( ns );
  877. uassert( 13051, "tailable cursor requested on non capped collection", d && d->isCapped() );
  878. const BSONObj nat1 = BSON( "$natural" << 1 );
  879. if ( order.isEmpty() ) {
  880. order = nat1;
  881. }
  882. else {
  883. uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
  884. }
  885. }
  886. // Run a regular query.
  887. BSONObj oldPlan;
  888. if ( ! hasRetried && explain && ! pq.hasIndexSpecifier() ) {
  889. scoped_ptr<MultiPlanScanner> mps( MultiPlanScanner::make( ns, query, order ) );
  890. oldPlan = mps->cachedPlanExplainSummary();
  891. }
  892. return queryWithQueryOptimizer( queryOptions, ns, jsobj, curop, query, order,
  893. pq_shared, oldPlan, shardingVersionAtStart,
  894. pgfs, npfe, result );
  895. }
  896. catch ( PageFaultException& e ) {
  897. e.touch();
  898. }
  899. catch ( const QueryRetryException & ) {
  900. // In some cases the query may be retried if there is an in memory sort size assertion.
  901. verify( ! hasRetried );
  902. hasRetried = true;
  903. }
  904. }
  905. }
  906. } // namespace mongo