PageRenderTime 56ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/Directory-Queue-1.6/lib/Directory/Queue/Normal.pm

#
Perl | 978 lines | 628 code | 138 blank | 212 comment | 84 complexity | a0c62ea58cbf24e313a0ad4b32235a5c MD5 | raw file
  1. #+##############################################################################
  2. # #
  3. # File: Directory/Queue/Normal.pm #
  4. # #
  5. # Description: object oriented interface to a normal directory based queue #
  6. # #
  7. #-##############################################################################
  8. #
  9. # module definition
  10. #
  11. package Directory::Queue::Normal;
  12. use strict;
  13. use warnings;
  14. our $VERSION = "1.6";
  15. our $REVISION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
  16. #
  17. # used modules
  18. #
  19. use Directory::Queue qw(:DIR :FILE :RE :ST _fatal _name);
  20. use POSIX qw(:errno_h);
  21. #
  22. # inheritance
  23. #
  24. our(@ISA) = qw(Directory::Queue);
  25. #
  26. # constants
  27. #
  28. # name of the directory holding temporary elements
  29. use constant TEMPORARY_DIRECTORY => "temporary";
  30. # name of the directory holding obsolete elements
  31. use constant OBSOLETE_DIRECTORY => "obsolete";
  32. # name of the directory indicating a locked element
  33. use constant LOCKED_DIRECTORY => "locked";
  34. #
  35. # global variables
  36. #
  37. our(
  38. $_FileRegexp, # regexp matching a file in an element directory
  39. %_Byte2Esc, # byte to escape map
  40. %_Esc2Byte, # escape to byte map
  41. );
  42. $_FileRegexp = qr/[0-9a-zA-Z]+/;
  43. %_Byte2Esc = ("\x5c" => "\\\\", "\x09" => "\\t", "\x0a" => "\\n");
  44. %_Esc2Byte = reverse(%_Byte2Esc);
  45. #+++############################################################################
  46. # #
  47. # Helper Functions #
  48. # #
  49. #---############################################################################
  50. #
  51. # transform a hash of strings into a string (reference)
  52. #
  53. # note:
  54. # - the keys are sorted so that identical hashes yield to identical strings
  55. #
  56. sub _hash2string ($) {
  57. my($hash) = @_;
  58. my($key, $value, $string);
  59. $string = "";
  60. foreach $key (sort(keys(%$hash))) {
  61. $value = $hash->{$key};
  62. _fatal("undefined hash value: %s", $key) unless defined($value);
  63. _fatal("invalid hash scalar: %s", $value) if ref($value);
  64. $key =~ s/([\x5c\x09\x0a])/$_Byte2Esc{$1}/g;
  65. $value =~ s/([\x5c\x09\x0a])/$_Byte2Esc{$1}/g;
  66. $string .= $key . "\x09" . $value . "\x0a";
  67. }
  68. return(\$string);
  69. }
  70. #
  71. # transform a string (reference) into a hash of strings
  72. #
  73. # note:
  74. # - duplicate keys are not checked (the last one wins)
  75. #
  76. sub _string2hash ($) {
  77. my($stringref) = @_;
  78. my($line, $key, $value, %hash);
  79. foreach $line (split(/\x0a/, $$stringref)) {
  80. _fatal("unexpected hash line: %s", $line)
  81. unless $line =~ /^([^\x09\x0a]*)\x09([^\x09\x0a]*)$/o;
  82. ($key, $value) = ($1, $2);
  83. $key =~ s/(\\[\\tn])/$_Esc2Byte{$1}/g;
  84. $value =~ s/(\\[\\tn])/$_Esc2Byte{$1}/g;
  85. $hash{$key} = $value;
  86. }
  87. return(\%hash);
  88. }
  89. #
  90. # check if a path is old enough:
  91. # - return true if the path exists and is (strictly) older than the given time
  92. # - return false if it does not exist or it is newer
  93. # - die in case of any other error
  94. #
  95. # note:
  96. # - lstat() is used so symlinks are not followed
  97. #
  98. sub _older ($$) {
  99. my($path, $time) = @_;
  100. my(@stat);
  101. @stat = lstat($path);
  102. unless (@stat) {
  103. _fatal("cannot lstat(%s): %s", $path, $!) unless $! == ENOENT;
  104. # RACE: this path does not exist (anymore)
  105. return(0);
  106. }
  107. return($stat[ST_MTIME] < $time);
  108. }
  109. #
  110. # count the number of sub-directories in the given directory:
  111. # - return undef if the directory does not exist (anymore)
  112. # - die in case of any other error
  113. #
  114. # stat version (faster):
  115. # - lstat() is used so symlinks are not followed
  116. # - this only checks the number of hard links
  117. # - we do not even check that the given path indeed points to a directory!
  118. sub _subdirs_stat ($) {
  119. my($path) = @_;
  120. my(@stat);
  121. @stat = lstat($path);
  122. unless (@stat) {
  123. _fatal("cannot lstat(%s): %s", $path, $!) unless $! == ENOENT;
  124. # RACE: this path does not exist (anymore)
  125. return();
  126. }
  127. return($stat[ST_NLINK] - 2);
  128. }
  129. # readdir version (slower):
  130. # - we really count the number of entries
  131. # - we however do not check that these entries are themselves indeed directories
  132. sub _subdirs_readdir ($) {
  133. my($path) = @_;
  134. return(scalar(_special_getdir($path)));
  135. }
  136. # use the right version (we cannot rely on hard links on DOS-like systems)
  137. if ($^O =~ /^(cygwin|dos|MSWin32)$/) {
  138. *_subdirs = \&_subdirs_readdir;
  139. } else {
  140. *_subdirs = \&_subdirs_stat;
  141. }
  142. #
  143. # check the given string to make sure it represents a valid element name
  144. #
  145. sub _check_element ($) {
  146. my($element) = @_;
  147. _fatal("invalid element: %s", $element)
  148. unless $element =~ m/^(?:$_DirectoryRegexp)\/(?:$_ElementRegexp)$/o;
  149. }
  150. #+++############################################################################
  151. # #
  152. # Object Oriented Interface #
  153. # #
  154. #---############################################################################
  155. #
  156. # object constructor
  157. #
  158. sub new : method {
  159. my($class, %option) = @_;
  160. my($self, $name, $path, $options);
  161. # default object
  162. $self = __PACKAGE__->SUPER::_new(%option);
  163. foreach $name (qw(path umask)) {
  164. delete($option{$name});
  165. }
  166. # default options
  167. $self->{maxelts} = 16_000; # maximum number of elements allowed per directory
  168. # check maxelts
  169. if (defined($option{maxelts})) {
  170. _fatal("invalid maxelts: %s", $option{maxelts})
  171. unless $option{maxelts} =~ /^\d+$/ and $option{maxelts} > 0;
  172. $self->{maxelts} = delete($option{maxelts});
  173. }
  174. # check schema
  175. if (defined($option{schema})) {
  176. _fatal("invalid schema: %s", $option{schema})
  177. unless ref($option{schema}) eq "HASH";
  178. foreach $name (keys(%{ $option{schema} })) {
  179. _fatal("invalid schema name: %s", $name)
  180. unless $name =~ /^($_FileRegexp)$/ and $name ne LOCKED_DIRECTORY;
  181. _fatal("invalid schema type: %s", $option{schema}{$name})
  182. unless $option{schema}{$name} =~ /^(binary|string|table)([\?\*]{0,2})$/;
  183. $self->{type}{$name} = $1;
  184. $options = $2;
  185. $self->{mandatory}{$name} = 1 unless $options =~ /\?/;
  186. $self->{ref}{$name} = 1 if $options =~ /\*/;
  187. _fatal("invalid schema type: %s", $option{schema}{$name})
  188. if $self->{type}{$name} eq "table" and $self->{ref}{$name};
  189. }
  190. _fatal("invalid schema: no mandatory data")
  191. unless $self->{mandatory};
  192. delete($option{schema});
  193. }
  194. # check unexpected options
  195. foreach $name (keys(%option)) {
  196. _fatal("unexpected option: %s", $name);
  197. }
  198. # create directories
  199. foreach $name (TEMPORARY_DIRECTORY, OBSOLETE_DIRECTORY) {
  200. $path = $self->{path} . "/" . $name;
  201. _special_mkdir($path, $self->{umask}) unless -d $path;
  202. }
  203. # so far so good...
  204. return($self);
  205. }
  206. #
  207. # return the number of elements in the queue, regardless of their state
  208. #
  209. sub count : method {
  210. my($self) = @_;
  211. my($count, $name, @list, $subdirs);
  212. $count = 0;
  213. # get the list of existing directories
  214. foreach $name (_special_getdir($self->{path}, "strict")) {
  215. push(@list, $1) if $name =~ /^($_DirectoryRegexp)$/o; # untaint
  216. }
  217. # count sub-directories
  218. foreach $name (@list) {
  219. $subdirs = _subdirs($self->{path} . "/" . $name);
  220. $count += $subdirs if $subdirs;
  221. }
  222. # that's all
  223. return($count);
  224. }
  225. #
  226. # check if an element is locked:
  227. # - this is best effort only as it may change while we test (only locking is atomic)
  228. # - if given a time, only return true on locks older than this time (needed by purge)
  229. #
  230. # version using nlink (faster)
  231. sub _is_locked_nlink : method {
  232. my($self, $name, $time) = @_;
  233. my($path, @stat);
  234. $path = $self->{path} . "/" . $name;
  235. @stat = lstat($path);
  236. unless (@stat) {
  237. _fatal("cannot lstat(%s): %s", $path, $!) unless $! == ENOENT;
  238. # RACE: this path does not exist (anymore)
  239. return(0);
  240. }
  241. # locking increases nlink so...
  242. return(0) unless $stat[ST_NLINK] > 2;
  243. # check age if time is given
  244. return(0) if defined($time) and $stat[ST_MTIME] >= $time;
  245. # so far so good but we double check that the proper directory does exist
  246. return(-d $path . "/" . LOCKED_DIRECTORY);
  247. }
  248. # version not using nlink (slower)
  249. sub _is_locked_nonlink : method {
  250. my($self, $name, $time) = @_;
  251. my($path, @stat);
  252. $path = $self->{path} . "/" . $name;
  253. return(0) unless -d $path . "/" . LOCKED_DIRECTORY;
  254. return(1) unless defined($time);
  255. @stat = lstat($path);
  256. unless (@stat) {
  257. _fatal("cannot lstat(%s): %s", $path, $!) unless $! == ENOENT;
  258. # RACE: this path does not exist (anymore)
  259. return(0);
  260. }
  261. return($stat[ST_MTIME] < $time);
  262. }
  263. # use the right version (we cannot rely on hard links on DOS-like systems)
  264. if ($^O =~ /^(cygwin|dos|MSWin32)$/) {
  265. *_is_locked = \&_is_locked_nonlink;
  266. } else {
  267. *_is_locked = \&_is_locked_nlink;
  268. }
  269. #
  270. # lock an element:
  271. # - return true on success
  272. # - return false in case the element could not be locked (in permissive mode)
  273. #
  274. # note:
  275. # - locking can fail:
  276. # - if the element has been locked by somebody else (EEXIST)
  277. # - if the element has been removed by somebody else (ENOENT)
  278. # - if the optional second argument is true, it is not an error if
  279. # the element cannot be locked (= permissive mode), this is the default
  280. # as one usually cannot be sure that nobody else will try to lock it
  281. # - the directory's mtime will change automatically (after a successful mkdir()),
  282. # this will later be used to detect stalled locks
  283. #
  284. sub lock : method {
  285. my($self, $element, $permissive) = @_;
  286. my($path, $oldumask, $success);
  287. _check_element($element);
  288. $permissive = 1 unless defined($permissive);
  289. $path = $self->{path} . "/" . $element . "/" . LOCKED_DIRECTORY;
  290. if (defined($self->{umask})) {
  291. $oldumask = umask($self->{umask});
  292. $success = mkdir($path);
  293. umask($oldumask);
  294. } else {
  295. $success = mkdir($path);
  296. }
  297. unless ($success) {
  298. if ($permissive) {
  299. # RACE: the locked directory already exists
  300. return(0) if $! == EEXIST;
  301. # RACE: the element directory does not exist anymore
  302. return(0) if $! == ENOENT;
  303. }
  304. # otherwise this is unexpected...
  305. _fatal("cannot mkdir(%s): %s", $path, $!);
  306. }
  307. $path = $self->{path} . "/" . $element;
  308. unless (lstat($path)) {
  309. if ($permissive and $! == ENOENT) {
  310. # RACE: the element directory does not exist anymore
  311. # (this can happen if an other process locked & removed the element
  312. # while our mkdir() was in progress... yes, this can happen!)
  313. return(0);
  314. }
  315. # otherwise this is unexpected...
  316. _fatal("cannot lstat(%s): %s", $path, $!);
  317. }
  318. # so far so good
  319. return(1);
  320. }
  321. #
  322. # unlock an element:
  323. # - return true on success
  324. # - return false in case the element could not be unlocked (in permissive mode)
  325. #
  326. # note:
  327. # - unlocking can fail:
  328. # - if the element has been unlocked by somebody else (ENOENT)
  329. # - if the element has been removed by somebody else (ENOENT)
  330. # - if the optional second argument is true, it is not an error if
  331. # the element cannot be unlocked (= permissive mode), this is _not_ the default
  332. # as unlock() should normally be called by whoever locked the element
  333. #
  334. sub unlock : method {
  335. my($self, $element, $permissive) = @_;
  336. my($path);
  337. _check_element($element);
  338. $path = $self->{path} . "/" . $element . "/" . LOCKED_DIRECTORY;
  339. unless (rmdir($path)) {
  340. if ($permissive) {
  341. # RACE: the element directory or its lock does not exist anymore
  342. return(0) if $! == ENOENT;
  343. }
  344. # otherwise this is unexpected...
  345. _fatal("cannot rmdir(%s): %s", $path, $!);
  346. }
  347. # so far so good
  348. return(1);
  349. }
  350. #
  351. # remove a locked element from the queue
  352. #
  353. sub remove : method {
  354. my($self, $element) = @_;
  355. my($temp, $name, $path);
  356. _check_element($element);
  357. _fatal("cannot remove %s: not locked", $element) unless $self->_is_locked($element);
  358. # move the element out of its intermediate directory
  359. $path = $self->{path} . "/" . $element;
  360. while (1) {
  361. $temp = $self->{path} . "/" . OBSOLETE_DIRECTORY . "/" . _name($self->{rndhex});
  362. rename($path, $temp) and last;
  363. _fatal("cannot rename(%s, %s): %s", $path, $temp, $!)
  364. unless $! == ENOTEMPTY or $! == EEXIST;
  365. # RACE: the target directory was already present...
  366. }
  367. # remove the data files
  368. foreach $name (_special_getdir($temp, "strict")) {
  369. next if $name eq LOCKED_DIRECTORY;
  370. _fatal("unexpected file in %s: %s", $temp, $name)
  371. unless $name =~ /^($_FileRegexp)$/o;
  372. $path = $temp . "/" . $1; # untaint
  373. unlink($path) and next;
  374. _fatal("cannot unlink(%s): %s", $path, $!);
  375. }
  376. # remove the locked directory
  377. $path = $temp . "/" . LOCKED_DIRECTORY;
  378. while (1) {
  379. rmdir($path) or _fatal("cannot rmdir(%s): %s", $path, $!);
  380. rmdir($temp) and return;
  381. _fatal("cannot rmdir(%s): %s", $temp, $!)
  382. unless $! == ENOTEMPTY or $! == EEXIST;
  383. # RACE: this can happen if an other process managed to lock this element
  384. # while it was being removed (see the comment in the lock() method)
  385. # so we try to remove the lock again and again...
  386. }
  387. }
  388. #
  389. # get an element from a locked element
  390. #
  391. sub get : method {
  392. my($self, $element) = @_;
  393. my(%data, $name, $path, $ref);
  394. _fatal("unknown schema") unless $self->{type};
  395. _check_element($element);
  396. _fatal("cannot get %s: not locked", $element) unless $self->_is_locked($element);
  397. foreach $name (keys(%{ $self->{type} })) {
  398. $path = "$self->{path}/$element/$name";
  399. unless (lstat($path)) {
  400. _fatal("cannot lstat(%s): %s", $path, $!) unless $! == ENOENT;
  401. if ($self->{mandatory}{$name}) {
  402. _fatal("missing data file: %s", $path);
  403. } else {
  404. next;
  405. }
  406. }
  407. if ($self->{type}{$name} =~ /^(binary|string)$/) {
  408. if ($self->{type}{$name} eq "string") {
  409. $ref = _file_read_utf8($path);
  410. } else {
  411. $ref = _file_read_bin($path);
  412. }
  413. $data{$name} = $self->{ref}{$name} ? $ref : $$ref;
  414. } elsif ($self->{type}{$name} eq "table") {
  415. $data{$name} = _string2hash(_file_read_utf8($path));
  416. } else {
  417. _fatal("unexpected data type: %s", $self->{type}{$name});
  418. }
  419. }
  420. return(\%data) unless wantarray();
  421. return(%data);
  422. }
  423. #
  424. # return the name of the intermediate directory that can be used for insertion:
  425. # - if there is none, an initial one will be created
  426. # - if it is full, a new one will be created
  427. # - in any case the name will match $_DirectoryRegexp
  428. #
  429. sub _insertion_directory : method {
  430. my($self) = @_;
  431. my(@list, $name, $subdirs);
  432. # get the list of existing directories
  433. foreach $name (_special_getdir($self->{path}, "strict")) {
  434. push(@list, $1) if $name =~ /^($_DirectoryRegexp)$/o; # untaint
  435. }
  436. # handle the case with no directories yet
  437. unless (@list) {
  438. $name = sprintf("%08x", 0);
  439. _special_mkdir($self->{path} . "/" . $name, $self->{umask});
  440. return($name);
  441. }
  442. # check the last directory
  443. @list = sort(@list);
  444. $name = pop(@list);
  445. $subdirs = _subdirs($self->{path} . "/" . $name);
  446. if (defined($subdirs)) {
  447. return($name) if $subdirs < $self->{maxelts};
  448. # this last directory is now full... create a new one
  449. } else {
  450. # RACE: at this point, the directory does not exist anymore, so it
  451. # must have been purged after we listed the directory contents...
  452. # we do not try to do more and simply create a new directory
  453. }
  454. # we need a new directory
  455. $name = sprintf("%08x", hex($name) + 1);
  456. _special_mkdir($self->{path} . "/" . $name, $self->{umask});
  457. return($name);
  458. }
  459. #
  460. # add a new element to the queue and return its name
  461. #
  462. # note:
  463. # - the destination directory must _not_ be created beforehand as it would
  464. # be seen as a valid (but empty) element directory by an other process,
  465. # we therefor use rename() from a temporary directory
  466. # - syswrite() used in _file_write() may die with a "Wide character"
  467. # "severe warning", we trap it here to provide better information
  468. #
  469. sub add : method {
  470. my($self, @data) = @_;
  471. my($data, $temp, $dir, $name, $path, $ref, $utf8);
  472. _fatal("unknown schema") unless $self->{type};
  473. if (@data == 1) {
  474. $data = $data[0];
  475. } else {
  476. $data = { @data };
  477. }
  478. while (1) {
  479. $temp = $self->{path} . "/" . TEMPORARY_DIRECTORY . "/" . _name($self->{rndhex});
  480. last if _special_mkdir($temp, $self->{umask});
  481. }
  482. foreach $name (keys(%$data)) {
  483. _fatal("unexpected data: %s", $name) unless $self->{type}{$name};
  484. if ($self->{type}{$name} =~ /^(binary|string)$/) {
  485. if ($self->{ref}{$name}) {
  486. _fatal("unexpected %s data in %s: %s",
  487. $self->{type}{$name}, $name, $data->{$name})
  488. unless ref($data->{$name}) eq "SCALAR";
  489. $ref = $data->{$name};
  490. } else {
  491. _fatal("unexpected %s data in %s: %s",
  492. $self->{type}{$name}, $name, $data->{$name})
  493. if ref($data->{$name});
  494. $ref = \$data->{$name};
  495. }
  496. $utf8 = $self->{type}{$name} eq "string";
  497. } elsif ($self->{type}{$name} eq "table") {
  498. _fatal("unexpected %s data in %s: %s", $self->{type}{$name}, $name, $data->{$name})
  499. unless ref($data->{$name}) eq "HASH";
  500. $ref = _hash2string($data->{$name});
  501. $utf8 = 1;
  502. } else {
  503. _fatal("unexpected data type in %s: %s", $name, $self->{type}{$name});
  504. }
  505. eval {
  506. _file_write("$temp/$name", $utf8, $self->{umask}, $ref);
  507. };
  508. if ($@) {
  509. if ($@ =~ /^Wide character in /) {
  510. _fatal("unexpected wide character in %s: %s", $name, $data->{$name});
  511. } else {
  512. die($@);
  513. }
  514. }
  515. }
  516. foreach $name (keys(%{ $self->{mandatory} })) {
  517. _fatal("missing mandatory data: %s", $name)
  518. unless defined($data->{$name});
  519. }
  520. $dir = $self->_insertion_directory();
  521. while (1) {
  522. $name = $dir . "/" . _name($self->{rndhex});
  523. $path = $self->{path} . "/" . $name;
  524. rename($temp, $path) and return($name);
  525. _fatal("cannot rename(%s, %s): %s", $temp, $path, $!)
  526. unless $! == ENOTEMPTY or $! == EEXIST;
  527. # RACE: the target directory was already present...
  528. }
  529. }
  530. #
  531. # return the list of volatile (i.e. temporary or obsolete) directories
  532. #
  533. sub _volatile : method {
  534. my($self) = @_;
  535. my(@list, $name);
  536. foreach $name (_special_getdir($self->{path} . "/" . TEMPORARY_DIRECTORY)) {
  537. push(@list, TEMPORARY_DIRECTORY . "/" . $1)
  538. if $name =~ /^($_ElementRegexp)$/o; # untaint
  539. }
  540. foreach $name (_special_getdir($self->{path} . "/" . OBSOLETE_DIRECTORY)) {
  541. push(@list, OBSOLETE_DIRECTORY . "/" . $1)
  542. if $name =~ /^($_ElementRegexp)$/o; # untaint
  543. }
  544. return(@list);
  545. }
  546. #
  547. # purge the queue:
  548. # - delete unused intermediate directories
  549. # - delete too old temporary directories
  550. # - unlock too old locked directories
  551. #
  552. # note: this uses first()/next() to iterate so this will reset the cursor
  553. #
  554. sub purge : method {
  555. my($self, %option) = @_;
  556. my(@list, $name, $path, $subdirs, $oldtime, $file, $fpath);
  557. # check options
  558. $option{maxtemp} = 300 unless defined($option{maxtemp});
  559. $option{maxlock} = 600 unless defined($option{maxlock});
  560. foreach $name (keys(%option)) {
  561. _fatal("unexpected option: %s", $name)
  562. unless $name =~ /^(maxtemp|maxlock)$/;
  563. _fatal("invalid %s: %s", $name, $option{$name})
  564. unless $option{$name} =~ /^\d+$/;
  565. }
  566. # get the list of intermediate directories
  567. @list = ();
  568. foreach $name (_special_getdir($self->{path}, "strict")) {
  569. push(@list, $1) if $name =~ /^($_DirectoryRegexp)$/o; # untaint
  570. }
  571. @list = sort(@list);
  572. # try to purge all but last one
  573. if (@list > 1) {
  574. pop(@list);
  575. foreach $name (@list) {
  576. $path = $self->{path} . "/" . $name;
  577. $subdirs = _subdirs($path);
  578. next if $subdirs or not defined($subdirs);
  579. _special_rmdir($path);
  580. }
  581. }
  582. # remove the volatile directories which are too old
  583. if ($option{maxtemp}) {
  584. $oldtime = time() - $option{maxtemp};
  585. foreach $name ($self->_volatile()) {
  586. $path = $self->{path} . "/" . $name;
  587. next unless _older($path, $oldtime);
  588. warn("* removing too old volatile element: $name\n");
  589. foreach $file (_special_getdir($path)) {
  590. next if $file eq LOCKED_DIRECTORY;
  591. $fpath = "$path/$file";
  592. unlink($fpath) and next;
  593. _fatal("cannot unlink(%s): %s", $fpath, $!) unless $! == ENOENT;
  594. }
  595. _special_rmdir($path . "/" . LOCKED_DIRECTORY);
  596. _special_rmdir($path);
  597. }
  598. }
  599. # iterate to find abandoned locked entries
  600. if ($option{maxlock}) {
  601. $oldtime = time() - $option{maxlock};
  602. $name = $self->first();
  603. while ($name) {
  604. next unless $self->_is_locked($name, $oldtime);
  605. warn("* removing too old locked element: $name\n");
  606. $self->unlock($name, 1);
  607. } continue {
  608. $name = $self->next();
  609. }
  610. }
  611. }
  612. 1;
  613. __END__
  614. =head1 NAME
  615. Directory::Queue::Normal - object oriented interface to a normal directory based queue
  616. =head1 SYNOPSIS
  617. use Directory::Queue::Normal;
  618. #
  619. # simple schema:
  620. # - there must be a "body" which is a string
  621. # - there can be a "header" which is a table/hash
  622. #
  623. $schema = { "body" => "string", "header" => "table?" };
  624. $queuedir = "/tmp/test";
  625. #
  626. # sample producer
  627. #
  628. $dirq = Directory::Queue::Normal->new(path => $queuedir, schema => $schema);
  629. foreach $count (1 .. 100) {
  630. $name = $dirq->add(body => "element $count\n", header => \%ENV);
  631. printf("# added element %d as %s\n", $count, $name);
  632. }
  633. #
  634. # sample consumer (one pass only)
  635. #
  636. $dirq = Directory::Queue::Normal->new(path => $queuedir, schema => $schema);
  637. for ($name = $dirq->first(); $name; $name = $dirq->next()) {
  638. next unless $dirq->lock($name);
  639. printf("# reading element %s\n", $name);
  640. %data = $dirq->get($name);
  641. # one can use $data{body} and $data{header} here...
  642. # one could use $dirq->unlock($name) to only browse the queue...
  643. $dirq->remove($name);
  644. }
  645. #
  646. # looping consumer (sleeping to avoid using all CPU time)
  647. #
  648. $dirq = Directory::Queue::Normal->new(path => $queuedir, schema => $schema);
  649. while (1) {
  650. sleep(1) unless $dirq->count();
  651. for ($name = $dirq->first(); $name; $name = $dirq->next()) {
  652. ... same as above ...
  653. }
  654. }
  655. =head1 DESCRIPTION
  656. The goal of this module is to offer a "normal" (as opposed to
  657. "simple") queue system using the underlying filesystem for storage,
  658. security and to prevent race conditions via atomic operations.
  659. It allows arbitrary data to be stored (see the L</SCHEMA> section for
  660. more information) but it has a significant disk space and speed
  661. overhead.
  662. Please refer to L<Directory::Queue> for general information about
  663. directory queues.
  664. =head1 CONSTRUCTOR
  665. The new() method can be used to create a Directory::Queue::Normal
  666. object that will later be used to interact with the queue. The
  667. following attributes are supported:
  668. =over
  669. =item path
  670. the queue toplevel directory (mandatory)
  671. =item umask
  672. the umask to use when creating files and directories
  673. (default: use the running process' umask)
  674. =item maxelts
  675. the maximum number of elements that an intermediate directory can hold
  676. (default: 16,000)
  677. =item schema
  678. the schema defining how to interpret user supplied data
  679. (mandatory if elements are added or read)
  680. =back
  681. =head1 SCHEMA
  682. The schema defines how user supplied data is stored in the queue. It
  683. is only required by the add() and get() methods.
  684. The schema must be a reference to a hash containing key/value pairs.
  685. The key must contain only alphanumerical characters. It identifies the
  686. piece of data and will be used as file name when storing the data
  687. inside the element directory.
  688. The value represents the type of the given piece of data. It can be:
  689. =over
  690. =item binary
  691. the data is a binary string (i.e. a sequence of bytes), it will be
  692. stored directly in a plain file with no further encoding
  693. =item string
  694. the data is a text string (i.e. a sequence of characters), it will be
  695. UTF-8 encoded before being stored in a file
  696. =item table
  697. the data is a reference to a hash of text strings, it will be
  698. serialized and UTF-8 encoded before being stored in a file
  699. =back
  700. By default, all pieces of data are mandatory. If you append a question
  701. mark to the type, this piece of data will be marked as optional. See
  702. the comments in the L</SYNOPSIS> section for an example.
  703. By default, string or binary data is used directly. If you append an
  704. asterisk to the type, the data that you add or get will be by
  705. reference. This can be useful to avoid string copies of large amounts
  706. of data.
  707. =head1 METHODS
  708. The following methods are available:
  709. =over
  710. =item new()
  711. return a new Directory::Queue::Normal object (class method)
  712. =item copy()
  713. return a copy of the object; this can be useful to have independent
  714. iterators on the same queue
  715. =item path()
  716. return the queue toplevel path
  717. =item id()
  718. return a unique identifier for the queue
  719. =item count()
  720. return the number of elements in the queue
  721. =item first()
  722. return the first element in the queue, resetting the iterator;
  723. return an empty string if the queue is empty
  724. =item next()
  725. return the next element in the queue, incrementing the iterator;
  726. return an empty string if there is no next element
  727. =item add(DATA)
  728. add the given data (a hash or hash reference) to the queue and return
  729. the corresponding element name; the schema must be known and the data
  730. must conform to it
  731. =item lock(ELEMENT[, PERMISSIVE])
  732. attempt to lock the given element and return true on success; if the
  733. PERMISSIVE option is true (which is the default), it is not a fatal
  734. error if the element cannot be locked and false is returned
  735. =item unlock(ELEMENT[, PERMISSIVE])
  736. attempt to unlock the given element and return true on success; if the
  737. PERMISSIVE option is true (which is I<not> the default), it is not a
  738. fatal error if the element cannot be unlocked and false is returned
  739. =item touch(ELEMENT)
  740. update the access and modification times on the element's directory to
  741. indicate that it is still being used; this is useful for elements that
  742. are locked for long periods of time (see the purge() method)
  743. =item remove(ELEMENT)
  744. remove the given element (which must be locked) from the queue
  745. =item get(ELEMENT)
  746. get the data from the given element (which must be locked) and return
  747. basically the same hash as what add() got (in list context, the hash
  748. is returned directly while in scalar context, the hash reference is
  749. returned instead); the schema must be knownand the data must conform
  750. to it
  751. =item purge([OPTIONS])
  752. purge the queue by removing unused intermediate directories, removing
  753. too old temporary elements and unlocking too old locked elements (aka
  754. staled locks); note: this can take a long time on queues with many
  755. elements; OPTIONS can be:
  756. =over
  757. =item maxtemp
  758. maximum time for a temporary element (in seconds, default 300);
  759. if set to 0, temporary elements will not be removed
  760. =item maxlock
  761. maximum time for a locked element (in seconds, default 600);
  762. if set to 0, locked elements will not be unlocked
  763. =back
  764. =back
  765. =head1 DIRECTORY STRUCTURE
  766. All the directories holding the elements and all the files holding the
  767. data pieces are located under the queue toplevel directory. This
  768. directory can contain:
  769. =over
  770. =item temporary
  771. the directory holding temporary elements, i.e. the elements being added
  772. =item obsolete
  773. the directory holding obsolete elements, i.e. the elements being removed
  774. =item I<NNNNNNNN>
  775. an intermediate directory holding elements; I<NNNNNNNN> is an 8-digits
  776. long hexadecimal number
  777. =back
  778. In any of the above directories, an element is stored as a single
  779. directory with a 14-digits long hexadecimal name I<SSSSSSSSMMMMMR> where:
  780. =over
  781. =item I<SSSSSSSS>
  782. represents the number of seconds since the Epoch
  783. =item I<MMMMM>
  784. represents the microsecond part of the time since the Epoch
  785. =item I<R>
  786. is a random digit used to reduce name collisions
  787. =back
  788. Finally, inside an element directory, the different pieces of data are
  789. stored into different files, named according to the schema. A locked
  790. element contains in addition a directory named C<locked>.
  791. =head1 SEE ALSO
  792. L<Directory::Queue>.
  793. =head1 AUTHOR
  794. Lionel Cons L<http://cern.ch/lionel.cons>
  795. Copyright CERN 2010-2012