PageRenderTime 29ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/apps/couch/src/couch_db_updater.erl

http://github.com/cloudant/bigcouch
Erlang | 984 lines | 794 code | 104 blank | 86 comment | 56 complexity | c6c2e8538c07bca347160cd06c53d9ec MD5 | raw file
Possible License(s): Apache-2.0
  1. % Licensed under the Apache License, Version 2.0 (the "License"); you may not
  2. % use this file except in compliance with the License. You may obtain a copy of
  3. % the License at
  4. %
  5. % http://www.apache.org/licenses/LICENSE-2.0
  6. %
  7. % Unless required by applicable law or agreed to in writing, software
  8. % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  9. % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  10. % License for the specific language governing permissions and limitations under
  11. % the License.
  12. -module(couch_db_updater).
  13. -behaviour(gen_server).
  14. -export([btree_by_id_split/1,btree_by_id_join/2,btree_by_id_reduce/2]).
  15. -export([btree_by_seq_split/1,btree_by_seq_join/2,btree_by_seq_reduce/2]).
  16. -export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]).
  17. -include("couch_db.hrl").
  18. init({DbName, Filepath, Fd, Options}) ->
  19. case lists:member(create, Options) of
  20. true ->
  21. % create a new header and writes it to the file
  22. Header = #db_header{},
  23. ok = couch_file:write_header(Fd, Header),
  24. % delete any old compaction files that might be hanging around
  25. RootDir = couch_config:get("couchdb", "database_dir", "."),
  26. couch_file:delete(RootDir, Filepath ++ ".compact");
  27. false ->
  28. ok = couch_file:upgrade_old_header(Fd, <<$g, $m, $k, 0>>), % 09 UPGRADE CODE
  29. case couch_file:read_header(Fd) of
  30. {ok, Header} ->
  31. ok;
  32. no_valid_header ->
  33. % create a new header and writes it to the file
  34. Header = #db_header{},
  35. ok = couch_file:write_header(Fd, Header),
  36. % delete any old compaction files that might be hanging around
  37. file:delete(Filepath ++ ".compact")
  38. end
  39. end,
  40. Db = init_db(DbName, Filepath, Fd, Header),
  41. couch_stats_collector:track_process_count({couchdb, open_databases}),
  42. % we don't load validation funs here because the fabric query is liable to
  43. % race conditions. Instead see couch_db:validate_doc_update, which loads
  44. % them lazily
  45. {ok, Db#db{main_pid = self(), is_sys_db = lists:member(sys_db, Options)}}.
  46. terminate(_Reason, Db) ->
  47. ok = couch_file:close(Db#db.fd),
  48. couch_util:shutdown_sync(Db#db.compactor_pid),
  49. couch_util:shutdown_sync(Db#db.fd),
  50. ok.
  51. handle_call(start_compact, _From, Db) ->
  52. {noreply, NewDb} = handle_cast(start_compact, Db),
  53. {reply, {ok, NewDb#db.compactor_pid}, NewDb};
  54. handle_call(get_db, _From, Db) ->
  55. {reply, {ok, Db}, Db};
  56. handle_call(full_commit, _From, #db{waiting_delayed_commit=nil}=Db) ->
  57. {reply, ok, Db};
  58. handle_call(full_commit, _From, Db) ->
  59. {reply, ok, commit_data(Db)};
  60. handle_call({full_commit, _}, _From, #db{waiting_delayed_commit=nil}=Db) ->
  61. {reply, ok, Db}; % no data waiting, return ok immediately
  62. handle_call({full_commit, RequiredSeq}, _From, Db) when RequiredSeq =<
  63. Db#db.committed_update_seq ->
  64. {reply, ok, Db};
  65. handle_call({full_commit, _}, _, Db) ->
  66. {reply, ok, commit_data(Db)}; % commit the data and return ok
  67. handle_call(increment_update_seq, _From, Db) ->
  68. Db2 = commit_data(Db#db{update_seq=Db#db.update_seq+1}),
  69. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  70. couch_db_update_notifier:notify({updated, Db#db.name}),
  71. {reply, {ok, Db2#db.update_seq}, Db2};
  72. handle_call({set_security, NewSec}, _From, Db) ->
  73. {ok, Ptr} = couch_file:append_term(Db#db.fd, NewSec),
  74. Db2 = commit_data(Db#db{security=NewSec, security_ptr=Ptr,
  75. update_seq=Db#db.update_seq+1}),
  76. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  77. {reply, ok, Db2};
  78. handle_call({set_revs_limit, Limit}, _From, Db) ->
  79. Db2 = commit_data(Db#db{revs_limit=Limit,
  80. update_seq=Db#db.update_seq+1}),
  81. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  82. {reply, ok, Db2};
  83. handle_call({purge_docs, _IdRevs}, _From,
  84. #db{compactor_pid=Pid}=Db) when Pid /= nil ->
  85. {reply, {error, purge_during_compaction}, Db};
  86. handle_call({purge_docs, IdRevs}, _From, Db) ->
  87. #db{
  88. fd=Fd,
  89. id_tree = DocInfoByIdBTree,
  90. seq_tree = DocInfoBySeqBTree,
  91. update_seq = LastSeq,
  92. header = Header = #db_header{purge_seq=PurgeSeq}
  93. } = Db,
  94. DocLookups = couch_btree:lookup(DocInfoByIdBTree,
  95. [Id || {Id, _Revs} <- IdRevs]),
  96. NewDocInfos = lists:zipwith(
  97. fun({_Id, Revs}, {ok, #full_doc_info{rev_tree=Tree}=FullDocInfo}) ->
  98. case couch_key_tree:remove_leafs(Tree, Revs) of
  99. {_, []=_RemovedRevs} -> % no change
  100. nil;
  101. {NewTree, RemovedRevs} ->
  102. {FullDocInfo#full_doc_info{rev_tree=NewTree},RemovedRevs}
  103. end;
  104. (_, not_found) ->
  105. nil
  106. end,
  107. IdRevs, DocLookups),
  108. SeqsToRemove = [Seq
  109. || {#full_doc_info{update_seq=Seq},_} <- NewDocInfos],
  110. FullDocInfoToUpdate = [FullInfo
  111. || {#full_doc_info{rev_tree=Tree}=FullInfo,_}
  112. <- NewDocInfos, Tree /= []],
  113. IdRevsPurged = [{Id, Revs}
  114. || {#full_doc_info{id=Id}, Revs} <- NewDocInfos],
  115. {DocInfoToUpdate, NewSeq} = lists:mapfoldl(
  116. fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
  117. Tree2 = couch_key_tree:map_leafs(
  118. fun(_RevId, {IsDeleted, BodyPointer, _UpdateSeq}) ->
  119. {IsDeleted, BodyPointer, SeqAcc + 1}
  120. end, Tree),
  121. {couch_doc:to_doc_info(FullInfo#full_doc_info{rev_tree=Tree2}),
  122. SeqAcc + 1}
  123. end, LastSeq, FullDocInfoToUpdate),
  124. IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=[]},_}
  125. <- NewDocInfos],
  126. {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree,
  127. DocInfoToUpdate, SeqsToRemove),
  128. {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree,
  129. FullDocInfoToUpdate, IdsToRemove),
  130. {ok, Pointer} = couch_file:append_term(Fd, IdRevsPurged),
  131. Db2 = commit_data(
  132. Db#db{
  133. id_tree = DocInfoByIdBTree2,
  134. seq_tree = DocInfoBySeqBTree2,
  135. update_seq = NewSeq + 1,
  136. header=Header#db_header{purge_seq=PurgeSeq+1, purged_docs=Pointer}}),
  137. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  138. couch_db_update_notifier:notify({updated, Db#db.name}),
  139. {reply, {ok, (Db2#db.header)#db_header.purge_seq, IdRevsPurged}, Db2}.
  140. handle_cast({load_validation_funs, ValidationFuns}, Db) ->
  141. Db2 = Db#db{validate_doc_funs = ValidationFuns},
  142. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  143. {noreply, Db2};
  144. handle_cast(start_compact, Db) ->
  145. case Db#db.compactor_pid of
  146. nil ->
  147. ?LOG_INFO("Starting compaction for db \"~s\"", [Db#db.name]),
  148. Pid = spawn_link(fun() -> start_copy_compact(Db) end),
  149. Db2 = Db#db{compactor_pid=Pid},
  150. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  151. {noreply, Db2};
  152. _ ->
  153. % compact currently running, this is a no-op
  154. {noreply, Db}
  155. end;
  156. handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) ->
  157. {ok, NewFd} = couch_file:open(CompactFilepath),
  158. {ok, NewHeader} = couch_file:read_header(NewFd),
  159. #db{update_seq=NewSeq} = NewDb =
  160. init_db(Db#db.name, Filepath, NewFd, NewHeader),
  161. unlink(NewFd),
  162. case Db#db.update_seq == NewSeq of
  163. true ->
  164. % suck up all the local docs into memory and write them to the new db
  165. {ok, _, LocalDocs} = couch_btree:foldl(Db#db.local_tree,
  166. fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []),
  167. {ok, NewLocalBtree} = couch_btree:add(NewDb#db.local_tree, LocalDocs),
  168. NewDb2 = commit_data(NewDb#db{
  169. local_tree = NewLocalBtree,
  170. main_pid = self(),
  171. filepath = Filepath,
  172. instance_start_time = Db#db.instance_start_time,
  173. revs_limit = Db#db.revs_limit
  174. }),
  175. ?LOG_DEBUG("CouchDB swapping files ~s and ~s.",
  176. [Filepath, CompactFilepath]),
  177. RootDir = couch_config:get("couchdb", "database_dir", "."),
  178. couch_file:delete(RootDir, Filepath),
  179. ok = file:rename(CompactFilepath, Filepath),
  180. close_db(Db),
  181. NewDb3 = refresh_validate_doc_funs(NewDb2),
  182. ok = gen_server:call(couch_server, {db_updated, NewDb3}, infinity),
  183. couch_db_update_notifier:notify({compacted, NewDb3#db.name}),
  184. ?LOG_INFO("Compaction for db \"~s\" completed.", [Db#db.name]),
  185. {noreply, NewDb3#db{compactor_pid=nil}};
  186. false ->
  187. ?LOG_INFO("Compaction for ~s still behind main file "
  188. "(update seq=~p. compact update seq=~p). Retrying.",
  189. [Db#db.name, Db#db.update_seq, NewSeq]),
  190. close_db(NewDb),
  191. Pid = spawn_link(fun() -> start_copy_compact(Db) end),
  192. Db2 = Db#db{compactor_pid=Pid},
  193. {noreply, Db2}
  194. end.
  195. handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts,
  196. FullCommit}, Db) ->
  197. GroupedDocs2 = [[{Client, D} || D <- DocGroup] || DocGroup <- GroupedDocs],
  198. if NonRepDocs == [] ->
  199. {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2,
  200. [Client], MergeConflicts, FullCommit);
  201. true ->
  202. GroupedDocs3 = GroupedDocs2,
  203. FullCommit2 = FullCommit,
  204. Clients = [Client]
  205. end,
  206. NonRepDocs2 = [{Client, NRDoc} || NRDoc <- NonRepDocs],
  207. try update_docs_int(Db, GroupedDocs3, NonRepDocs2, MergeConflicts,
  208. FullCommit2) of
  209. {ok, Db2} ->
  210. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  211. if Db2#db.update_seq /= Db#db.update_seq ->
  212. couch_db_update_notifier:notify({updated, Db2#db.name});
  213. true -> ok
  214. end,
  215. [catch(ClientPid ! {done, self()}) || ClientPid <- Clients],
  216. {noreply, Db2}
  217. catch
  218. throw: retry ->
  219. [catch(ClientPid ! {retry, self()}) || ClientPid <- Clients],
  220. {noreply, Db}
  221. end;
  222. handle_info(delayed_commit, #db{waiting_delayed_commit=nil}=Db) ->
  223. %no outstanding delayed commits, ignore
  224. {noreply, Db};
  225. handle_info(delayed_commit, Db) ->
  226. case commit_data(Db) of
  227. Db ->
  228. {noreply, Db};
  229. Db2 ->
  230. ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
  231. {noreply, Db2}
  232. end;
  233. handle_info({'EXIT', _Pid, normal}, Db) ->
  234. {noreply, Db};
  235. handle_info({'EXIT', _Pid, Reason}, Db) ->
  236. {stop, Reason, Db};
  237. handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name} = Db) ->
  238. ?LOG_ERROR("DB ~s shutting down - Fd ~p", [Name, Reason]),
  239. {stop, normal, Db}.
  240. code_change(_OldVsn, State, _Extra) ->
  241. {ok, State}.
  242. merge_updates([], RestB, AccOutGroups) ->
  243. lists:reverse(AccOutGroups, RestB);
  244. merge_updates(RestA, [], AccOutGroups) ->
  245. lists:reverse(AccOutGroups, RestA);
  246. merge_updates([[{_, #doc{id=IdA}}|_]=GroupA | RestA],
  247. [[{_, #doc{id=IdB}}|_]=GroupB | RestB], AccOutGroups) ->
  248. if IdA == IdB ->
  249. merge_updates(RestA, RestB, [GroupA ++ GroupB | AccOutGroups]);
  250. IdA < IdB ->
  251. merge_updates(RestA, [GroupB | RestB], [GroupA | AccOutGroups]);
  252. true ->
  253. merge_updates([GroupA | RestA], RestB, [GroupB | AccOutGroups])
  254. end.
  255. collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) ->
  256. receive
  257. % Only collect updates with the same MergeConflicts flag and without
  258. % local docs. It's easier to just avoid multiple _local doc
  259. % updaters than deal with their possible conflicts, and local docs
  260. % writes are relatively rare. Can be optmized later if really needed.
  261. {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} ->
  262. GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup]
  263. || DocGroup <- GroupedDocs],
  264. GroupedDocsAcc2 =
  265. merge_updates(GroupedDocsAcc, GroupedDocs2, []),
  266. collect_updates(GroupedDocsAcc2, [Client | ClientsAcc],
  267. MergeConflicts, (FullCommit or FullCommit2))
  268. after 0 ->
  269. {GroupedDocsAcc, ClientsAcc, FullCommit}
  270. end.
  271. rev_tree(DiskTree) ->
  272. couch_key_tree:map(fun(_RevId, {Del, Ptr, Seq}) ->
  273. #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq};
  274. (_RevId, {Del, Ptr, Seq, Size, Atts}) ->
  275. #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq, size=Size, atts=Atts};
  276. (_RevId, ?REV_MISSING) ->
  277. ?REV_MISSING
  278. end, DiskTree).
  279. disk_tree(RevTree) ->
  280. couch_key_tree:map(fun(_RevId, ?REV_MISSING) ->
  281. ?REV_MISSING;
  282. (_RevId, #leaf{deleted=Del, ptr=Ptr, seq=Seq, size=Size, atts=Atts}) ->
  283. {if Del -> 1; true -> 0 end, Ptr, Seq, Size, Atts}
  284. end, RevTree).
  285. btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
  286. {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}.
  287. btree_by_seq_join(Seq, {Id, Del, T}) when is_integer(Del) ->
  288. #full_doc_info{id=Id, update_seq=Seq, deleted=Del==1, rev_tree=rev_tree(T)};
  289. btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
  290. % 1.0 stored #doc_info records in the seq tree. compact to upgrade.
  291. #doc_info{
  292. id = Id,
  293. high_seq=KeySeq,
  294. revs =
  295. [#rev_info{rev=Rev,seq=Seq,deleted=false,body_sp = Bp} ||
  296. {Rev, Seq, Bp} <- RevInfos] ++
  297. [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
  298. {Rev, Seq, Bp} <- DeletedRevInfos]};
  299. btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) ->
  300. % 09 UPGRADE CODE
  301. % this is the 0.9.0 and earlier by_seq record. It's missing the body pointers
  302. % and individual seq nums for conflicts that are currently in the index,
  303. % meaning the filtered _changes api will not work except for on main docs.
  304. % Simply compact a 0.9.0 database to upgrade the index.
  305. #doc_info{
  306. id=Id,
  307. high_seq=KeySeq,
  308. revs = [#rev_info{rev=Rev,seq=KeySeq,deleted=Deleted,body_sp=Bp}] ++
  309. [#rev_info{rev=Rev1,seq=KeySeq,deleted=false} || Rev1 <- Conflicts] ++
  310. [#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}.
  311. btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
  312. data_size=Size, deleted=Deleted, rev_tree=Tree}) ->
  313. {Id, {Seq, if Deleted -> 1; true -> 0 end, Size, disk_tree(Tree)}}.
  314. %% handle old formats before `data_size` added
  315. btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
  316. btree_by_id_join(Id, {HighSeq, Deleted, 0, DiskTree});
  317. btree_by_id_join(Id, {HighSeq, Deleted, Size, DiskTree}) ->
  318. #full_doc_info{id=Id, update_seq=HighSeq,
  319. deleted=Deleted==1, data_size=Size,
  320. rev_tree=rev_tree(DiskTree)}.
  321. btree_by_id_reduce(reduce, FullDocInfos) ->
  322. lists:foldl(
  323. fun(#full_doc_info{deleted = false, data_size=Size},
  324. {NotDeleted, Deleted, DocSize}) ->
  325. {NotDeleted + 1, Deleted, DocSize + Size};
  326. (#full_doc_info{deleted = true, data_size=Size},
  327. {NotDeleted, Deleted, DocSize}) ->
  328. {NotDeleted, Deleted + 1, DocSize + Size}
  329. end,
  330. {0, 0, 0}, FullDocInfos);
  331. btree_by_id_reduce(rereduce, Reductions) ->
  332. lists:foldl(
  333. fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, AccDocSizes}) ->
  334. {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccDocSizes};
  335. ({NotDeleted, Deleted, DocSizes}, {AccNotDeleted, AccDeleted, AccDocSizes}) ->
  336. {AccNotDeleted + NotDeleted, AccDeleted + Deleted, DocSizes + AccDocSizes}
  337. end,
  338. {0, 0, 0}, Reductions).
  339. btree_by_seq_reduce(reduce, DocInfos) ->
  340. % count the number of documents
  341. length(DocInfos);
  342. btree_by_seq_reduce(rereduce, Reds) ->
  343. lists:sum(Reds).
  344. simple_upgrade_record(Old, New) when tuple_size(Old) =:= tuple_size(New) ->
  345. Old;
  346. simple_upgrade_record(Old, New) when tuple_size(Old) < tuple_size(New) ->
  347. OldSz = tuple_size(Old),
  348. NewValuesTail =
  349. lists:sublist(tuple_to_list(New), OldSz + 1, tuple_size(New) - OldSz),
  350. list_to_tuple(tuple_to_list(Old) ++ NewValuesTail).
  351. init_db(DbName, Filepath, Fd, Header0) ->
  352. Header1 = simple_upgrade_record(Header0, #db_header{}),
  353. Header =
  354. case element(2, Header1) of
  355. 1 -> Header1#db_header{unused = 0, security_ptr = nil}; % 0.9
  356. 2 -> Header1#db_header{unused = 0, security_ptr = nil}; % post 0.9 and pre 0.10
  357. 3 -> Header1#db_header{security_ptr = nil}; % post 0.9 and pre 0.10
  358. 4 -> Header1#db_header{security_ptr = nil}; % 0.10 and pre 0.11
  359. ?LATEST_DISK_VERSION -> Header1;
  360. _ -> throw({database_disk_version_error, "Incorrect disk header version"})
  361. end,
  362. {ok, FsyncOptions} = couch_util:parse_term(
  363. couch_config:get("couchdb", "fsync_options",
  364. "[before_header, after_header, on_file_open]")),
  365. case lists:member(on_file_open, FsyncOptions) of
  366. true -> ok = couch_file:sync(Filepath);
  367. _ -> ok
  368. end,
  369. {ok, IdBtree} = couch_btree:open(Header#db_header.id_tree_state, Fd,
  370. [{split, fun ?MODULE:btree_by_id_split/1},
  371. {join, fun ?MODULE:btree_by_id_join/2},
  372. {reduce, fun ?MODULE:btree_by_id_reduce/2}]),
  373. {ok, SeqBtree} = couch_btree:open(Header#db_header.seq_tree_state, Fd,
  374. [{split, fun ?MODULE:btree_by_seq_split/1},
  375. {join, fun ?MODULE:btree_by_seq_join/2},
  376. {reduce, fun ?MODULE:btree_by_seq_reduce/2}]),
  377. {ok, LocalDocsBtree} = couch_btree:open(Header#db_header.local_tree_state, Fd),
  378. case Header#db_header.security_ptr of
  379. nil ->
  380. Security = [],
  381. SecurityPtr = nil;
  382. SecurityPtr ->
  383. {ok, Security} = couch_file:pread_term(Fd, SecurityPtr)
  384. end,
  385. % convert start time tuple to microsecs and store as a binary string
  386. {MegaSecs, Secs, MicroSecs} = now(),
  387. StartTime = ?l2b(io_lib:format("~p",
  388. [(MegaSecs*1000000*1000000) + (Secs*1000000) + MicroSecs])),
  389. #db{
  390. fd=Fd,
  391. fd_monitor = erlang:monitor(process,Fd),
  392. header=Header,
  393. id_tree = IdBtree,
  394. seq_tree = SeqBtree,
  395. local_tree = LocalDocsBtree,
  396. committed_update_seq = Header#db_header.update_seq,
  397. update_seq = Header#db_header.update_seq,
  398. name = DbName,
  399. filepath = Filepath,
  400. security = Security,
  401. security_ptr = SecurityPtr,
  402. instance_start_time = StartTime,
  403. revs_limit = Header#db_header.revs_limit,
  404. fsync_options = FsyncOptions
  405. }.
  406. close_db(#db{fd_monitor = Ref}) ->
  407. erlang:demonitor(Ref).
  408. refresh_validate_doc_funs(#db{name = <<"shards/", _/binary>> = Name} = Db) ->
  409. spawn(fabric, reset_validation_funs, [mem3:dbname(Name)]),
  410. Db#db{validate_doc_funs = undefined};
  411. refresh_validate_doc_funs(Db) ->
  412. {ok, DesignDocs} = couch_db:get_design_docs(Db),
  413. ProcessDocFuns = lists:flatmap(
  414. fun(DesignDoc) ->
  415. case couch_doc:get_validate_doc_fun(DesignDoc) of
  416. nil -> [];
  417. Fun -> [Fun]
  418. end
  419. end, DesignDocs),
  420. Db#db{validate_doc_funs=ProcessDocFuns}.
  421. % rev tree functions
  422. flush_trees(_Db, [], AccFlushedTrees) ->
  423. {ok, lists:reverse(AccFlushedTrees)};
  424. flush_trees(#db{fd=Fd,header=Header}=Db,
  425. [InfoUnflushed | RestUnflushed], AccFlushed) ->
  426. #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed,
  427. Flushed = couch_key_tree:map(
  428. fun(_Rev, Value) ->
  429. case Value of
  430. #doc{atts=Atts,deleted=IsDeleted}=Doc ->
  431. % this node value is actually an unwritten document summary,
  432. % write to disk.
  433. % make sure the Fd in the written bins is the same Fd we are
  434. % and convert bins, removing the FD.
  435. % All bins should have been written to disk already.
  436. {DiskAtts, SizeInfo} =
  437. case Atts of
  438. [] -> {[],[]};
  439. [#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd ->
  440. {[{N,T,P,AL,DL,R,M,E}
  441. || #att{name=N,type=T,data={_,P},md5=M,revpos=R,
  442. att_len=AL,disk_len=DL,encoding=E}
  443. <- Atts],
  444. [{P1,AL1}
  445. || #att{data={_,P1},att_len=AL1}
  446. <- Atts]};
  447. _ ->
  448. % BinFd must not equal our Fd. This can happen when a database
  449. % is being switched out during a compaction
  450. ?LOG_DEBUG("File where the attachments are written has"
  451. " changed. Possibly retrying.", []),
  452. throw(retry)
  453. end,
  454. {ok, NewSummaryPointer} =
  455. case Header#db_header.disk_version < 4 of
  456. true ->
  457. {ok, _} = couch_file:append_term(Fd, {Doc#doc.body, DiskAtts});
  458. false ->
  459. {ok, _} = couch_file:append_term_md5(Fd, {Doc#doc.body, DiskAtts})
  460. end,
  461. #leaf{
  462. deleted = IsDeleted,
  463. ptr = NewSummaryPointer,
  464. seq = UpdateSeq,
  465. size = size(term_to_binary(Doc#doc.body)),
  466. atts = SizeInfo
  467. };
  468. _ ->
  469. Value
  470. end
  471. end, Unflushed),
  472. flush_trees(Db, RestUnflushed, [InfoUnflushed#full_doc_info{rev_tree=Flushed} | AccFlushed]).
  473. send_result(Client, Id, OriginalRevs, NewResult) ->
  474. % used to send a result to the client
  475. catch(Client ! {result, self(), {{Id, OriginalRevs}, NewResult}}).
  476. merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) ->
  477. {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq};
  478. merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
  479. [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
  480. #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted,update_seq=OldSeq}
  481. = OldDocInfo,
  482. NewRevTree = lists:foldl(
  483. fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, AccTree) ->
  484. if not MergeConflicts ->
  485. case couch_key_tree:merge(AccTree, couch_db:doc_to_tree(NewDoc),
  486. Limit) of
  487. {_NewTree, conflicts} when (not OldDeleted) ->
  488. send_result(Client, Id, {Pos-1,PrevRevs}, conflict),
  489. AccTree;
  490. {NewTree, conflicts} when PrevRevs /= [] ->
  491. % Check to be sure if prev revision was specified, it's
  492. % a leaf node in the tree
  493. Leafs = couch_key_tree:get_all_leafs(AccTree),
  494. IsPrevLeaf = lists:any(fun({_, {LeafPos, [LeafRevId|_]}}) ->
  495. {LeafPos, LeafRevId} == {Pos-1, hd(PrevRevs)}
  496. end, Leafs),
  497. if IsPrevLeaf ->
  498. NewTree;
  499. true ->
  500. send_result(Client, Id, {Pos-1,PrevRevs}, conflict),
  501. AccTree
  502. end;
  503. {NewTree, no_conflicts} when AccTree == NewTree ->
  504. % the tree didn't change at all
  505. % meaning we are saving a rev that's already
  506. % been editted again.
  507. if (Pos == 1) and OldDeleted ->
  508. % this means we are recreating a brand new document
  509. % into a state that already existed before.
  510. % put the rev into a subsequent edit of the deletion
  511. #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} =
  512. couch_doc:to_doc_info(OldDocInfo),
  513. NewRevId = couch_db:new_revid(
  514. NewDoc#doc{revs={OldPos, [OldRev]}}),
  515. NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
  516. {NewTree2, _} = couch_key_tree:merge(AccTree,
  517. couch_db:doc_to_tree(NewDoc2), Limit),
  518. % we changed the rev id, this tells the caller we did
  519. send_result(Client, Id, {Pos-1,PrevRevs},
  520. {ok, {OldPos + 1, NewRevId}}),
  521. NewTree2;
  522. true ->
  523. send_result(Client, Id, {Pos-1,PrevRevs}, conflict),
  524. AccTree
  525. end;
  526. {NewTree, _} ->
  527. NewTree
  528. end;
  529. true ->
  530. {NewTree, _} = couch_key_tree:merge(AccTree,
  531. couch_db:doc_to_tree(NewDoc), Limit),
  532. NewTree
  533. end
  534. end,
  535. OldTree, NewDocs),
  536. if NewRevTree == OldTree ->
  537. % nothing changed
  538. merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
  539. AccNewInfos, AccRemoveSeqs, AccSeq);
  540. true ->
  541. % we have updated the document, give it a new seq #
  542. NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree},
  543. RemoveSeqs = case OldSeq of
  544. 0 -> AccRemoveSeqs;
  545. _ -> [OldSeq | AccRemoveSeqs]
  546. end,
  547. merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
  548. [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1)
  549. end.
  550. new_index_entries([], Acc) ->
  551. Acc;
  552. new_index_entries([Info|Rest], Acc) ->
  553. #doc_info{revs=[#rev_info{deleted=Del}|_]} = couch_doc:to_doc_info(Info),
  554. new_index_entries(Rest, [Info#full_doc_info{deleted=Del}|Acc]).
  555. stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
  556. [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
  557. #full_doc_info{rev_tree=Tree}=Info <- DocInfos].
  558. update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
  559. #db{
  560. id_tree = DocInfoByIdBTree,
  561. seq_tree = DocInfoBySeqBTree,
  562. update_seq = LastSeq,
  563. revs_limit = RevsLimit
  564. } = Db,
  565. Ids = [Id || [{_Client, #doc{id=Id}}|_] <- DocsList],
  566. % lookup up the old documents, if they exist.
  567. OldDocLookups = couch_btree:lookup(DocInfoByIdBTree, Ids),
  568. OldDocInfos = lists:zipwith(
  569. fun(_Id, {ok, FullDocInfo}) ->
  570. FullDocInfo;
  571. (Id, not_found) ->
  572. #full_doc_info{id=Id}
  573. end,
  574. Ids, OldDocLookups),
  575. % Merge the new docs into the revision trees.
  576. {ok, NewFullDocInfos, RemoveSeqs, NewSeq} = merge_rev_trees(RevsLimit,
  577. MergeConflicts, DocsList, OldDocInfos, [], [], LastSeq),
  578. % All documents are now ready to write.
  579. {ok, Db2} = update_local_docs(Db, NonRepDocs),
  580. % Write out the document summaries (the bodies are stored in the nodes of
  581. % the trees, the attachments are already written to disk)
  582. {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
  583. IndexInfos =
  584. new_index_entries(compute_data_sizes(FlushedFullDocInfos, []),
  585. []),
  586. % and the indexes
  587. {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree,
  588. IndexInfos, []),
  589. {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree,
  590. IndexInfos, RemoveSeqs),
  591. Db3 = Db2#db{
  592. id_tree = DocInfoByIdBTree2,
  593. seq_tree = DocInfoBySeqBTree2,
  594. update_seq = NewSeq},
  595. % Check if we just updated any design documents, and update the validation
  596. % funs if we did.
  597. case lists:any(
  598. fun(<<"_design/", _/binary>>) -> true; (_) -> false end, Ids) of
  599. false ->
  600. Db4 = Db3;
  601. true ->
  602. Db4 = refresh_validate_doc_funs(Db3)
  603. end,
  604. {ok, commit_data(Db4, not FullCommit)}.
  605. compute_data_sizes([], Acc) ->
  606. lists:reverse(Acc);
  607. compute_data_sizes([FullDocInfo | RestDocInfos], Acc) ->
  608. #full_doc_info{rev_tree=Tree} = FullDocInfo,
  609. Size = couch_key_tree:compute_data_size(Tree),
  610. compute_data_sizes(RestDocInfos,
  611. [FullDocInfo#full_doc_info{data_size=Size}
  612. | Acc]).
  613. update_local_docs(Db, []) ->
  614. {ok, Db};
  615. update_local_docs(#db{local_tree=Btree}=Db, Docs) ->
  616. Ids = [Id || {_Client, #doc{id=Id}} <- Docs],
  617. OldDocLookups = couch_btree:lookup(Btree, Ids),
  618. BtreeEntries = lists:zipwith(
  619. fun({Client, #doc{id=Id,deleted=Delete,revs={0,PrevRevs},body=Body}},
  620. _OldDocLookup) ->
  621. case PrevRevs of
  622. [RevStr|_] ->
  623. PrevRev = list_to_integer(?b2l(RevStr));
  624. [] ->
  625. PrevRev = 0
  626. end,
  627. %% disabled conflict checking for local docs -- APK 16 June 2010
  628. % OldRev =
  629. % case OldDocLookup of
  630. % {ok, {_, {OldRev0, _}}} -> OldRev0;
  631. % not_found -> 0
  632. % end,
  633. % case OldRev == PrevRev of
  634. % true ->
  635. case Delete of
  636. false ->
  637. send_result(Client, Id, {0, PrevRevs}, {ok,
  638. {0, ?l2b(integer_to_list(PrevRev + 1))}}),
  639. {update, {Id, {PrevRev + 1, Body}}};
  640. true ->
  641. send_result(Client, Id, {0, PrevRevs},
  642. {ok, {0, <<"0">>}}),
  643. {remove, Id}
  644. end%;
  645. % false ->
  646. % send_result(Client, Id, {0, PrevRevs}, conflict),
  647. % ignore
  648. % end
  649. end, Docs, OldDocLookups),
  650. BtreeIdsRemove = [Id || {remove, Id} <- BtreeEntries],
  651. BtreeIdsUpdate = [{Key, Val} || {update, {Key, Val}} <- BtreeEntries],
  652. {ok, Btree2} =
  653. couch_btree:add_remove(Btree, BtreeIdsUpdate, BtreeIdsRemove),
  654. {ok, Db#db{local_tree = Btree2}}.
  655. commit_data(Db) ->
  656. commit_data(Db, false).
  657. db_to_header(Db, Header) ->
  658. Header#db_header{
  659. update_seq = Db#db.update_seq,
  660. seq_tree_state = couch_btree:get_state(Db#db.seq_tree),
  661. id_tree_state = couch_btree:get_state(Db#db.id_tree),
  662. local_tree_state = couch_btree:get_state(Db#db.local_tree),
  663. security_ptr = Db#db.security_ptr,
  664. revs_limit = Db#db.revs_limit}.
  665. commit_data(#db{waiting_delayed_commit=nil} = Db, true) ->
  666. Db#db{waiting_delayed_commit=erlang:send_after(1000,self(),delayed_commit)};
  667. commit_data(Db, true) ->
  668. Db;
  669. commit_data(Db, _) ->
  670. #db{
  671. fd = Fd,
  672. filepath = Filepath,
  673. header = OldHeader,
  674. fsync_options = FsyncOptions,
  675. waiting_delayed_commit = Timer
  676. } = Db,
  677. if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end,
  678. case db_to_header(Db, OldHeader) of
  679. OldHeader ->
  680. Db#db{waiting_delayed_commit=nil};
  681. Header ->
  682. case lists:member(before_header, FsyncOptions) of
  683. true -> ok = couch_file:sync(Filepath);
  684. _ -> ok
  685. end,
  686. ok = couch_file:write_header(Fd, Header),
  687. case lists:member(after_header, FsyncOptions) of
  688. true -> ok = couch_file:sync(Filepath);
  689. _ -> ok
  690. end,
  691. Db#db{waiting_delayed_commit=nil,
  692. header=Header,
  693. committed_update_seq=Db#db.update_seq}
  694. end.
  695. copy_doc_attachments(#db{fd=SrcFd}=SrcDb, {Pos,_RevId}, SrcSp, DestFd) ->
  696. {ok, {BodyData, BinInfos}} = couch_db:read_doc(SrcDb, SrcSp),
  697. % copy the bin values
  698. NewBinInfos = lists:map(
  699. fun({Name, {Type, BinSp, AttLen}}) when is_tuple(BinSp) orelse BinSp == null ->
  700. % 09 UPGRADE CODE
  701. {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
  702. couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd),
  703. {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity};
  704. ({Name, {Type, BinSp, AttLen}}) ->
  705. % 09 UPGRADE CODE
  706. {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
  707. couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
  708. {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity};
  709. ({Name, Type, BinSp, AttLen, _RevPos, <<>>}) when
  710. is_tuple(BinSp) orelse BinSp == null ->
  711. % 09 UPGRADE CODE
  712. {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
  713. couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd),
  714. {Name, Type, NewBinSp, AttLen, AttLen, AttLen, Md5, identity};
  715. ({Name, Type, BinSp, AttLen, RevPos, Md5}) ->
  716. % 010 UPGRADE CODE
  717. {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
  718. couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
  719. {Name, Type, NewBinSp, AttLen, AttLen, RevPos, Md5, identity};
  720. ({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc1}) ->
  721. {NewBinSp, AttLen, _, Md5, _IdentityMd5} =
  722. couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
  723. Enc = case Enc1 of
  724. true ->
  725. % 0110 UPGRADE CODE
  726. gzip;
  727. false ->
  728. % 0110 UPGRADE CODE
  729. identity;
  730. _ ->
  731. Enc1
  732. end,
  733. {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc}
  734. end, BinInfos),
  735. {BodyData, NewBinInfos}.
  736. merge_lookups(Infos, []) ->
  737. Infos;
  738. merge_lookups([], _) ->
  739. [];
  740. merge_lookups([#doc_info{}|RestInfos], [{ok, FullDocInfo}|RestLookups]) ->
  741. [FullDocInfo|merge_lookups(RestInfos, RestLookups)];
  742. merge_lookups([FullDocInfo|RestInfos], Lookups) ->
  743. [FullDocInfo|merge_lookups(RestInfos, Lookups)].
  744. copy_docs(Db, #db{fd=DestFd}=NewDb, MixedInfos, Retry) ->
  745. % lookup any necessary full_doc_infos
  746. DocInfoIds = [Id || #doc_info{id=Id} <- MixedInfos],
  747. LookupResults = couch_btree:lookup(Db#db.id_tree, DocInfoIds),
  748. % COUCHDB-968, make sure we prune duplicates during compaction
  749. Infos = lists:usort(fun(#full_doc_info{id=A}, #full_doc_info{id=B}) ->
  750. A =< B
  751. end, merge_lookups(MixedInfos, LookupResults)),
  752. NewInfos1 = [Info#full_doc_info{rev_tree=couch_key_tree:map(
  753. fun(Rev, #leaf{ptr=Sp, size=Size0}=Leaf, leaf) ->
  754. {Body, AttInfos} = copy_doc_attachments(Db, Rev, Sp, DestFd),
  755. {ok, Pos} = couch_file:append_term_md5(DestFd, {Body, AttInfos}),
  756. if Size0 > 0 ->
  757. Leaf#leaf{ptr=Pos};
  758. true ->
  759. DocSize = byte_size(term_to_binary(Body)),
  760. AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
  761. Leaf#leaf{ptr=Pos, size=DocSize, atts=AttSizes}
  762. end;
  763. (_, _, branch) ->
  764. ?REV_MISSING
  765. end, RevTree)} || #full_doc_info{rev_tree=RevTree}=Info <- Infos],
  766. NewInfos = stem_full_doc_infos(Db, compute_data_sizes(NewInfos1, [])),
  767. RemoveSeqs =
  768. case Retry of
  769. false ->
  770. [];
  771. true ->
  772. % We are retrying a compaction, meaning the documents we are copying may
  773. % already exist in our file and must be removed from the by_seq index.
  774. Ids = [Id || #full_doc_info{id=Id} <- Infos],
  775. Existing = couch_btree:lookup(NewDb#db.id_tree, Ids),
  776. [Seq || {ok, #full_doc_info{update_seq=Seq}} <- Existing]
  777. end,
  778. {ok, SeqTree} = couch_btree:add_remove(
  779. NewDb#db.seq_tree, NewInfos, RemoveSeqs),
  780. {ok, IdTree} = couch_btree:add_remove(
  781. NewDb#db.id_tree, NewInfos, []),
  782. update_compact_task(length(NewInfos)),
  783. NewDb#db{id_tree=IdTree, seq_tree=SeqTree}.
  784. copy_compact(Db, NewDb0, Retry) ->
  785. FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header],
  786. NewDb = NewDb0#db{fsync_options=FsyncOptions},
  787. TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
  788. EnumBySeqFun =
  789. fun(DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
  790. case DocInfo of
  791. #full_doc_info{update_seq=Seq} ->
  792. ok;
  793. #doc_info{high_seq=Seq} ->
  794. ok
  795. end,
  796. couch_task_status:update("Copied ~p of ~p changes (~p%)",
  797. [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]),
  798. if TotalCopied rem 1000 =:= 0 ->
  799. NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
  800. if TotalCopied rem 10000 =:= 0 ->
  801. NewDb3 = commit_data(NewDb2#db{update_seq=Seq}),
  802. {ok, {NewDb3, [], TotalCopied + 1}};
  803. true ->
  804. {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}}
  805. end;
  806. true ->
  807. {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}}
  808. end
  809. end,
  810. TaskProps0 = [
  811. {type, database_compaction},
  812. {database, Db#db.name},
  813. {progress, 0},
  814. {changes_done, 0},
  815. {total_changes, TotalChanges}
  816. ],
  817. case Retry and couch_task_status:is_task_added() of
  818. true ->
  819. couch_task_status:update([
  820. {retry, true},
  821. {progress, 0},
  822. {changes_done, 0},
  823. {total_changes, TotalChanges}
  824. ]);
  825. false ->
  826. couch_task_status:add_task(TaskProps0),
  827. couch_task_status:set_update_frequency(500)
  828. end,
  829. {ok, _, {NewDb2, Uncopied, TotalChanges}} =
  830. couch_btree:foldl(Db#db.seq_tree, EnumBySeqFun,
  831. {NewDb, [], 0},
  832. [{start_key, NewDb#db.update_seq + 1}]),
  833. NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry),
  834. % copy misc header values
  835. if NewDb3#db.security /= Db#db.security ->
  836. {ok, Ptr} = couch_file:append_term(NewDb3#db.fd, Db#db.security),
  837. NewDb4 = NewDb3#db{security=Db#db.security, security_ptr=Ptr};
  838. true ->
  839. NewDb4 = NewDb3
  840. end,
  841. commit_data(NewDb4#db{update_seq=Db#db.update_seq}).
  842. start_copy_compact(#db{name=Name,filepath=Filepath,header=#db_header{purge_seq=PurgeSeq}}=Db) ->
  843. CompactFile = Filepath ++ ".compact",
  844. ?LOG_DEBUG("Compaction process spawned for db \"~s\"", [Name]),
  845. case couch_file:open(CompactFile) of
  846. {ok, Fd} ->
  847. Retry = true,
  848. case couch_file:read_header(Fd) of
  849. {ok, Header} ->
  850. ok;
  851. no_valid_header ->
  852. ok = couch_file:write_header(Fd, Header=#db_header{})
  853. end;
  854. {error, enoent} ->
  855. {ok, Fd} = couch_file:open(CompactFile, [create]),
  856. Retry = false,
  857. ok = couch_file:write_header(Fd, Header=#db_header{})
  858. end,
  859. NewDb = init_db(Name, CompactFile, Fd, Header),
  860. NewDb2 = if PurgeSeq > 0 ->
  861. {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db),
  862. {ok, Pointer} = couch_file:append_term(Fd, PurgedIdsRevs),
  863. NewDb#db{header=Header#db_header{purge_seq=PurgeSeq, purged_docs=Pointer}};
  864. true ->
  865. NewDb
  866. end,
  867. unlink(Fd),
  868. NewDb3 = copy_compact(Db, NewDb2, Retry),
  869. close_db(NewDb3),
  870. gen_server:cast(Db#db.main_pid, {compact_done, CompactFile}).
  871. update_compact_task(NumChanges) ->
  872. [Changes, Total] = couch_task_status:get([changes_done, total_changes]),
  873. Changes2 = Changes + NumChanges,
  874. Progress = case Total of
  875. 0 ->
  876. 0;
  877. _ ->
  878. (Changes2 * 100) div Total
  879. end,
  880. couch_task_status:update([{changes_done, Changes2}, {progress, Progress}]).