/lib/src/elib1_fast_read.erl

https://github.com/midnightskinhead/elib1 · Erlang · 123 lines · 70 code · 23 blank · 30 comment · 0 complexity · 71e1dd731fb8281f1cba97dde2af0c64 MD5 · raw file

  1. %% Copyright (c) 2006-2009 Joe Armstrong
  2. %% See MIT-LICENSE for licensing information.
  3. -module(elib1_fast_read).
  4. -compile(export_all).
  5. -export([foldl/3, open_abs/1, close_abs/1, read_abs/2]).
  6. %% foldl(File, Fun, Acc) -> Acc1
  7. %% calls Fun(Tup, Pos, Acc) -> Acc1
  8. %% for each tuple in <File>
  9. %% File is a list of tuples encoded with
  10. %% <<Len:32/integer, term_to_binary(Tup)>>
  11. %% Pos is the *absolute* position in the file
  12. %% of the 4 byte header
  13. %% open_abs(File) -> Stream
  14. %% close_abs(Stream) -> void
  15. %% read_abs(Stream, Pos) -> Term
  16. %%----------------------------------------------------------------------
  17. %% @doc Fold a fun over a file of terms.
  18. %% The terms are stored in binary term format.
  19. %% Fun is of the form <b>F(Term, Pos, Acc) -> NewAcc</b>
  20. %% Terms are read one at a time from the file. Pos is an integer
  21. %% index into the file this can be used later as an argument
  22. %% to <b>read_abs</b>
  23. -type acc() :: any().
  24. -type filename() :: string().
  25. -type stream()::file:io_device().
  26. %% Acc is an accumulator
  27. -spec foldl(filename(),
  28. fun((any(), Pos::integer(), acc()) -> acc()),
  29. acc()) -> acc().
  30. foldl(File, Fun, Acc0) ->
  31. %% io:format("opening:~p~n",[File]),
  32. {ok, Stream} = file:open(File, [read,raw,binary,{read_ahead, 64000}]),
  33. try read2a(Stream, Fun, Acc0, 0, <<>>) of
  34. Acc1 ->
  35. file:close(Stream),
  36. Acc1
  37. catch
  38. throw:X ->
  39. file:close(Stream),
  40. X
  41. end.
  42. %% there is one unit test for elib1_fast_read and elib_fast_write
  43. foldl_test() ->
  44. Stream = elib1_fast_write:new("./tmp.tmp"),
  45. S1 = elib1_fast_write:write(Stream, {term,1}),
  46. S2 = elib1_fast_write:write(S1, "it works"),
  47. S3 = elib1_fast_write:write(S2, {term,2,more,stuff}),
  48. elib1_fast_write:close(S3),
  49. %% Now recover all the elements in the stream
  50. L = foldl("./tmp.tmp", fun(Term, Pos, A) -> [{Pos, Term}|A] end, []),
  51. [{Pos3,{term,2,more,stuff}},{Pos2,"it works"},{Pos1,{term,1}}] = L,
  52. Stream1 = open_abs("./tmp.tmp"),
  53. "it works" = read_abs(Stream1, Pos2),
  54. {term,1} = read_abs(Stream1, Pos1),
  55. {term,2,more,stuff} = read_abs(Stream1, Pos3),
  56. close_abs(Stream1),
  57. ok.
  58. %%----------------------------------------------------------------------
  59. %% @doc Create a stream from a file.
  60. -spec open_abs(filename()) -> stream().
  61. open_abs(File) ->
  62. {ok, Stream} = file:open(File, [read,raw,binary,{read_ahead, 64000}]),
  63. Stream.
  64. %%----------------------------------------------------------------------
  65. %% @doc Close a stream.
  66. -spec close_abs(stream()) -> ok.
  67. close_abs(Stream) ->
  68. file:close(Stream).
  69. %%----------------------------------------------------------------------
  70. %% @doc Read a term given the absolute position from a stream.
  71. -spec read_abs(stream(), Pos::integer()) -> any().
  72. read_abs(Stream, Pos) ->
  73. {ok, <<I:32/big>>} = file:pread(Stream, Pos, 4),
  74. {ok, Bin} = file:pread(Stream, Pos+4, I),
  75. binary_to_term(Bin).
  76. read2a(Stream, Fun, A0, Pos, <<Size:32/big,B/binary>>=Bx) ->
  77. case size(B) of
  78. N when N >= Size ->
  79. {Ba, Bb} = split_binary(B, Size),
  80. T = binary_to_term(Ba),
  81. %% This is where we call the user's function
  82. A1 = Fun(T, Pos, A0),
  83. read2a(Stream, Fun, A1, Pos + Size + 4, Bb);
  84. _ ->
  85. case
  86. file:read(Stream, 64000) of
  87. {ok, Bc} ->
  88. read2a(Stream, Fun, A0, Pos, <<Bx/binary, Bc/binary>>);
  89. eof ->
  90. A0
  91. end
  92. end;
  93. read2a(Stream, Fun, A0, Pos, Bx) ->
  94. case
  95. file:read(Stream, 64000) of
  96. {ok, Bc} ->
  97. read2a(Stream, Fun, A0, Pos, <<Bx/binary, Bc/binary>>);
  98. eof ->
  99. A0
  100. end.