PageRenderTime 56ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/perl/Git/SVN.pm

http://github.com/git/git
Perl | 2557 lines | 2226 code | 185 blank | 146 comment | 300 complexity | 19371abf3990673b9f08d1b478df126d MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, BSD-2-Clause

Large files files are truncated, but you can click here to view the full file

  1. package Git::SVN;
  2. use strict;
  3. use warnings;
  4. use Fcntl qw/:DEFAULT :seek/;
  5. use constant rev_map_fmt => 'NH40';
  6. use vars qw/$_no_metadata
  7. $_repack $_repack_flags $_use_svm_props $_head
  8. $_use_svnsync_props $no_reuse_existing
  9. $_use_log_author $_add_author_from $_localtime/;
  10. use Carp qw/croak/;
  11. use File::Path qw/mkpath/;
  12. use IPC::Open3;
  13. use Memoize; # core since 5.8.0, Jul 2002
  14. use POSIX qw(:signal_h);
  15. use Time::Local;
  16. use Git qw(
  17. command
  18. command_oneline
  19. command_noisy
  20. command_output_pipe
  21. command_close_pipe
  22. get_tz_offset
  23. );
  24. use Git::SVN::Utils qw(
  25. fatal
  26. can_compress
  27. join_paths
  28. canonicalize_path
  29. canonicalize_url
  30. add_path_to_url
  31. );
  32. my $memo_backend;
  33. our $_follow_parent = 1;
  34. our $_minimize_url = 'unset';
  35. our $default_repo_id = 'svn';
  36. our $default_ref_id = $ENV{GIT_SVN_ID} || 'git-svn';
  37. my ($_gc_nr, $_gc_period);
  38. # properties that we do not log:
  39. my %SKIP_PROP;
  40. BEGIN {
  41. %SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url
  42. svn:special svn:executable
  43. svn:entry:committed-rev
  44. svn:entry:last-author
  45. svn:entry:uuid
  46. svn:entry:committed-date/;
  47. # some options are read globally, but can be overridden locally
  48. # per [svn-remote "..."] section. Command-line options will *NOT*
  49. # override options set in an [svn-remote "..."] section
  50. no strict 'refs';
  51. for my $option (qw/follow_parent no_metadata use_svm_props
  52. use_svnsync_props/) {
  53. my $key = $option;
  54. $key =~ tr/_//d;
  55. my $prop = "-$option";
  56. *$option = sub {
  57. my ($self) = @_;
  58. return $self->{$prop} if exists $self->{$prop};
  59. my $k = "svn-remote.$self->{repo_id}.$key";
  60. eval { command_oneline(qw/config --get/, $k) };
  61. if ($@) {
  62. $self->{$prop} = ${"Git::SVN::_$option"};
  63. } else {
  64. my $v = command_oneline(qw/config --bool/,$k);
  65. $self->{$prop} = $v eq 'false' ? 0 : 1;
  66. }
  67. return $self->{$prop};
  68. }
  69. }
  70. }
  71. my (%LOCKFILES, %INDEX_FILES);
  72. END {
  73. unlink keys %LOCKFILES if %LOCKFILES;
  74. unlink keys %INDEX_FILES if %INDEX_FILES;
  75. }
  76. sub resolve_local_globs {
  77. my ($url, $fetch, $glob_spec) = @_;
  78. return unless defined $glob_spec;
  79. my $ref = $glob_spec->{ref};
  80. my $path = $glob_spec->{path};
  81. foreach (command(qw#for-each-ref --format=%(refname) refs/#)) {
  82. next unless m#^$ref->{regex}$#;
  83. my $p = $1;
  84. my $pathname = desanitize_refname($path->full_path($p));
  85. my $refname = desanitize_refname($ref->full_path($p));
  86. if (my $existing = $fetch->{$pathname}) {
  87. if ($existing ne $refname) {
  88. die "Refspec conflict:\n",
  89. "existing: $existing\n",
  90. " globbed: $refname\n";
  91. }
  92. my $u = (::cmt_metadata("$refname"))[0];
  93. if (!defined($u)) {
  94. warn
  95. "W: $refname: no associated commit metadata from SVN, skipping\n";
  96. next;
  97. }
  98. $u =~ s!^\Q$url\E(/|$)!! or die
  99. "$refname: '$url' not found in '$u'\n";
  100. if ($pathname ne $u) {
  101. warn "W: Refspec glob conflict ",
  102. "(ref: $refname):\n",
  103. "expected path: $pathname\n",
  104. " real path: $u\n",
  105. "Continuing ahead with $u\n";
  106. next;
  107. }
  108. } else {
  109. $fetch->{$pathname} = $refname;
  110. }
  111. }
  112. }
  113. sub parse_revision_argument {
  114. my ($base, $head) = @_;
  115. if (!defined $::_revision || $::_revision eq 'BASE:HEAD') {
  116. return ($base, $head);
  117. }
  118. return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/);
  119. return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/);
  120. return ($head, $head) if ($::_revision eq 'HEAD');
  121. return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/);
  122. return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/);
  123. die "revision argument: $::_revision not understood by git-svn\n";
  124. }
  125. sub fetch_all {
  126. my ($repo_id, $remotes) = @_;
  127. if (ref $repo_id) {
  128. my $gs = $repo_id;
  129. $repo_id = undef;
  130. $repo_id = $gs->{repo_id};
  131. }
  132. $remotes ||= read_all_remotes();
  133. my $remote = $remotes->{$repo_id} or
  134. die "[svn-remote \"$repo_id\"] unknown\n";
  135. my $fetch = $remote->{fetch};
  136. my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n";
  137. my (@gs, @globs);
  138. my $ra = Git::SVN::Ra->new($url);
  139. my $uuid = $ra->get_uuid;
  140. my $head = $ra->get_latest_revnum;
  141. # ignore errors, $head revision may not even exist anymore
  142. eval { $ra->get_log("", $head, 0, 1, 0, 1, sub { $head = $_[1] }) };
  143. warn "W: $@\n" if $@;
  144. my $base = defined $fetch ? $head : 0;
  145. # read the max revs for wildcard expansion (branches/*, tags/*)
  146. foreach my $t (qw/branches tags/) {
  147. defined $remote->{$t} or next;
  148. push @globs, @{$remote->{$t}};
  149. my $max_rev = eval { tmp_config(qw/--int --get/,
  150. "svn-remote.$repo_id.${t}-maxRev") };
  151. if (defined $max_rev && ($max_rev < $base)) {
  152. $base = $max_rev;
  153. } elsif (!defined $max_rev) {
  154. $base = 0;
  155. }
  156. }
  157. if ($fetch) {
  158. foreach my $p (sort keys %$fetch) {
  159. my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
  160. my $lr = $gs->rev_map_max;
  161. if (defined $lr) {
  162. $base = $lr if ($lr < $base);
  163. }
  164. push @gs, $gs;
  165. }
  166. }
  167. ($base, $head) = parse_revision_argument($base, $head);
  168. $ra->gs_fetch_loop_common($base, $head, \@gs, \@globs);
  169. }
  170. sub read_all_remotes {
  171. my $r = {};
  172. my $use_svm_props = eval { command_oneline(qw/config --bool
  173. svn.useSvmProps/) };
  174. $use_svm_props = $use_svm_props eq 'true' if $use_svm_props;
  175. my $svn_refspec = qr{\s*(.*?)\s*:\s*(.+?)\s*};
  176. foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) {
  177. if (m!^(.+)\.fetch=$svn_refspec$!) {
  178. my ($remote, $local_ref, $remote_ref) = ($1, $2, $3);
  179. die("svn-remote.$remote: remote ref '$remote_ref' "
  180. . "must start with 'refs/'\n")
  181. unless $remote_ref =~ m{^refs/};
  182. $local_ref = uri_decode($local_ref);
  183. $r->{$remote}->{fetch}->{$local_ref} = $remote_ref;
  184. $r->{$remote}->{svm} = {} if $use_svm_props;
  185. } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
  186. $r->{$1}->{svm} = {};
  187. } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
  188. $r->{$1}->{url} = canonicalize_url($2);
  189. } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
  190. $r->{$1}->{pushurl} = canonicalize_url($2);
  191. } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
  192. $r->{$1}->{ignore_refs_regex} = $2;
  193. } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
  194. my ($remote, $t, $local_ref, $remote_ref) =
  195. ($1, $2, $3, $4);
  196. die("svn-remote.$remote: remote ref '$remote_ref' ($t) "
  197. . "must start with 'refs/'\n")
  198. unless $remote_ref =~ m{^refs/};
  199. $local_ref = uri_decode($local_ref);
  200. require Git::SVN::GlobSpec;
  201. my $rs = {
  202. t => $t,
  203. remote => $remote,
  204. path => Git::SVN::GlobSpec->new($local_ref, 1),
  205. ref => Git::SVN::GlobSpec->new($remote_ref, 0) };
  206. if (length($rs->{ref}->{right}) != 0) {
  207. die "The '*' glob character must be the last ",
  208. "character of '$remote_ref'\n";
  209. }
  210. push @{ $r->{$remote}->{$t} }, $rs;
  211. }
  212. }
  213. map {
  214. if (defined $r->{$_}->{svm}) {
  215. my $svm;
  216. eval {
  217. my $section = "svn-remote.$_";
  218. $svm = {
  219. source => tmp_config('--get',
  220. "$section.svm-source"),
  221. replace => tmp_config('--get',
  222. "$section.svm-replace"),
  223. }
  224. };
  225. $r->{$_}->{svm} = $svm;
  226. }
  227. } keys %$r;
  228. foreach my $remote (keys %$r) {
  229. foreach ( grep { defined $_ }
  230. map { $r->{$remote}->{$_} } qw(branches tags) ) {
  231. foreach my $rs ( @$_ ) {
  232. $rs->{ignore_refs_regex} =
  233. $r->{$remote}->{ignore_refs_regex};
  234. }
  235. }
  236. }
  237. $r;
  238. }
  239. sub init_vars {
  240. $_gc_nr = $_gc_period = 1000;
  241. if (defined $_repack || defined $_repack_flags) {
  242. warn "Repack options are obsolete; they have no effect.\n";
  243. }
  244. }
  245. sub verify_remotes_sanity {
  246. return unless -d $ENV{GIT_DIR};
  247. my %seen;
  248. foreach (command(qw/config -l/)) {
  249. if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) {
  250. if ($seen{$1}) {
  251. die "Remote ref refs/remote/$1 is tracked by",
  252. "\n \"$_\"\nand\n \"$seen{$1}\"\n",
  253. "Please resolve this ambiguity in ",
  254. "your git configuration file before ",
  255. "continuing\n";
  256. }
  257. $seen{$1} = $_;
  258. }
  259. }
  260. }
  261. sub find_existing_remote {
  262. my ($url, $remotes) = @_;
  263. return undef if $no_reuse_existing;
  264. my $existing;
  265. foreach my $repo_id (keys %$remotes) {
  266. my $u = $remotes->{$repo_id}->{url} or next;
  267. next if $u ne $url;
  268. $existing = $repo_id;
  269. last;
  270. }
  271. $existing;
  272. }
  273. sub init_remote_config {
  274. my ($self, $url, $no_write) = @_;
  275. $url = canonicalize_url($url);
  276. my $r = read_all_remotes();
  277. my $existing = find_existing_remote($url, $r);
  278. if ($existing) {
  279. unless ($no_write) {
  280. print STDERR "Using existing ",
  281. "[svn-remote \"$existing\"]\n";
  282. }
  283. $self->{repo_id} = $existing;
  284. } elsif ($_minimize_url) {
  285. my $min_url = Git::SVN::Ra->new($url)->minimize_url;
  286. $existing = find_existing_remote($min_url, $r);
  287. if ($existing) {
  288. unless ($no_write) {
  289. print STDERR "Using existing ",
  290. "[svn-remote \"$existing\"]\n";
  291. }
  292. $self->{repo_id} = $existing;
  293. }
  294. if ($min_url ne $url) {
  295. unless ($no_write) {
  296. print STDERR "Using higher level of URL: ",
  297. "$url => $min_url\n";
  298. }
  299. my $old_path = $self->path;
  300. $url =~ s!^\Q$min_url\E(/|$)!!;
  301. $url = join_paths($url, $old_path);
  302. $self->path($url);
  303. $url = $min_url;
  304. }
  305. }
  306. my $orig_url;
  307. if (!$existing) {
  308. # verify that we aren't overwriting anything:
  309. $orig_url = eval {
  310. command_oneline('config', '--get',
  311. "svn-remote.$self->{repo_id}.url")
  312. };
  313. if ($orig_url && ($orig_url ne $url)) {
  314. die "svn-remote.$self->{repo_id}.url already set: ",
  315. "$orig_url\nwanted to set to: $url\n";
  316. }
  317. }
  318. my ($xrepo_id, $xpath) = find_ref($self->refname);
  319. if (!$no_write && defined $xpath) {
  320. die "svn-remote.$xrepo_id.fetch already set to track ",
  321. "$xpath:", $self->refname, "\n";
  322. }
  323. unless ($no_write) {
  324. command_noisy('config',
  325. "svn-remote.$self->{repo_id}.url", $url);
  326. my $path = $self->path;
  327. $path =~ s{^/}{};
  328. $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
  329. $self->path($path);
  330. command_noisy('config', '--add',
  331. "svn-remote.$self->{repo_id}.fetch",
  332. $self->path.":".$self->refname);
  333. }
  334. $self->url($url);
  335. }
  336. sub find_by_url { # repos_root and, path are optional
  337. my ($class, $full_url, $repos_root, $path) = @_;
  338. $full_url = canonicalize_url($full_url);
  339. return undef unless defined $full_url;
  340. remove_username($full_url);
  341. remove_username($repos_root) if defined $repos_root;
  342. my $remotes = read_all_remotes();
  343. if (defined $full_url && defined $repos_root && !defined $path) {
  344. $path = $full_url;
  345. $path =~ s#^\Q$repos_root\E(?:/|$)##;
  346. }
  347. foreach my $repo_id (keys %$remotes) {
  348. my $u = $remotes->{$repo_id}->{url} or next;
  349. remove_username($u);
  350. next if defined $repos_root && $repos_root ne $u;
  351. my $fetch = $remotes->{$repo_id}->{fetch} || {};
  352. foreach my $t (qw/branches tags/) {
  353. foreach my $globspec (@{$remotes->{$repo_id}->{$t}}) {
  354. resolve_local_globs($u, $fetch, $globspec);
  355. }
  356. }
  357. my $p = $path;
  358. my $rwr = rewrite_root({repo_id => $repo_id});
  359. my $svm = $remotes->{$repo_id}->{svm}
  360. if defined $remotes->{$repo_id}->{svm};
  361. unless (defined $p) {
  362. $p = $full_url;
  363. my $z = $u;
  364. my $prefix = '';
  365. if ($rwr) {
  366. $z = $rwr;
  367. remove_username($z);
  368. } elsif (defined $svm) {
  369. $z = $svm->{source};
  370. $prefix = $svm->{replace};
  371. $prefix =~ s#^\Q$u\E(?:/|$)##;
  372. $prefix =~ s#/$##;
  373. }
  374. $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
  375. }
  376. # remote fetch paths are not URI escaped. Decode ours
  377. # so they match
  378. $p = uri_decode($p);
  379. foreach my $f (keys %$fetch) {
  380. next if $f ne $p;
  381. return Git::SVN->new($fetch->{$f}, $repo_id, $f);
  382. }
  383. }
  384. undef;
  385. }
  386. sub init {
  387. my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_;
  388. my $self = _new($class, $repo_id, $ref_id, $path);
  389. if (defined $url) {
  390. $self->init_remote_config($url, $no_write);
  391. }
  392. $self;
  393. }
  394. sub find_ref {
  395. my ($ref_id) = @_;
  396. foreach (command(qw/config -l/)) {
  397. next unless m!^svn-remote\.(.+)\.fetch=
  398. \s*(.*?)\s*:\s*(.+?)\s*$!x;
  399. my ($repo_id, $path, $ref) = ($1, $2, $3);
  400. if ($ref eq $ref_id) {
  401. $path = '' if ($path =~ m#^\./?#);
  402. return ($repo_id, $path);
  403. }
  404. }
  405. (undef, undef, undef);
  406. }
  407. sub new {
  408. my ($class, $ref_id, $repo_id, $path) = @_;
  409. if (defined $ref_id && !defined $repo_id && !defined $path) {
  410. ($repo_id, $path) = find_ref($ref_id);
  411. if (!defined $repo_id) {
  412. die "Could not find a \"svn-remote.*.fetch\" key ",
  413. "in the repository configuration matching: ",
  414. "$ref_id\n";
  415. }
  416. }
  417. my $self = _new($class, $repo_id, $ref_id, $path);
  418. if (!defined $self->path || !length $self->path) {
  419. my $fetch = command_oneline('config', '--get',
  420. "svn-remote.$repo_id.fetch",
  421. ":$ref_id\$") or
  422. die "Failed to read \"svn-remote.$repo_id.fetch\" ",
  423. "\":$ref_id\$\" in config\n";
  424. my($path) = split(/\s*:\s*/, $fetch);
  425. $self->path($path);
  426. }
  427. {
  428. my $path = $self->path;
  429. $path =~ s{\A/}{};
  430. $path =~ s{/\z}{};
  431. $self->path($path);
  432. }
  433. my $url = command_oneline('config', '--get',
  434. "svn-remote.$repo_id.url") or
  435. die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
  436. $self->url($url);
  437. $self->{pushurl} = eval { command_oneline('config', '--get',
  438. "svn-remote.$repo_id.pushurl") };
  439. $self->rebuild;
  440. $self;
  441. }
  442. sub refname {
  443. my ($refname) = $_[0]->{ref_id} ;
  444. # It cannot end with a slash /, we'll throw up on this because
  445. # SVN can't have directories with a slash in their name, either:
  446. if ($refname =~ m{/$}) {
  447. die "ref: '$refname' ends with a trailing slash; this is ",
  448. "not permitted by git or Subversion\n";
  449. }
  450. # It cannot have ASCII control character space, tilde ~, caret ^,
  451. # colon :, question-mark ?, asterisk *, space, or open bracket [
  452. # anywhere.
  453. #
  454. # Additionally, % must be escaped because it is used for escaping
  455. # and we want our escaped refname to be reversible
  456. $refname =~ s{([ \%~\^:\?\*\[\t\\])}{sprintf('%%%02X',ord($1))}eg;
  457. # no slash-separated component can begin with a dot .
  458. # /.* becomes /%2E*
  459. $refname =~ s{/\.}{/%2E}g;
  460. # It cannot have two consecutive dots .. anywhere
  461. # .. becomes %2E%2E
  462. $refname =~ s{\.\.}{%2E%2E}g;
  463. # trailing dots and .lock are not allowed
  464. # .$ becomes %2E and .lock becomes %2Elock
  465. $refname =~ s{\.(?=$|lock$)}{%2E};
  466. # the sequence @{ is used to access the reflog
  467. # @{ becomes %40{
  468. $refname =~ s{\@\{}{%40\{}g;
  469. return $refname;
  470. }
  471. sub desanitize_refname {
  472. my ($refname) = @_;
  473. $refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg;
  474. return $refname;
  475. }
  476. sub svm_uuid {
  477. my ($self) = @_;
  478. return $self->{svm}->{uuid} if $self->svm;
  479. $self->ra;
  480. unless ($self->{svm}) {
  481. die "SVM UUID not cached, and reading remotely failed\n";
  482. }
  483. $self->{svm}->{uuid};
  484. }
  485. sub svm {
  486. my ($self) = @_;
  487. return $self->{svm} if $self->{svm};
  488. my $svm;
  489. # see if we have it in our config, first:
  490. eval {
  491. my $section = "svn-remote.$self->{repo_id}";
  492. $svm = {
  493. source => tmp_config('--get', "$section.svm-source"),
  494. uuid => tmp_config('--get', "$section.svm-uuid"),
  495. replace => tmp_config('--get', "$section.svm-replace"),
  496. }
  497. };
  498. if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) {
  499. $self->{svm} = $svm;
  500. }
  501. $self->{svm};
  502. }
  503. sub _set_svm_vars {
  504. my ($self, $ra) = @_;
  505. return $ra if $self->svm;
  506. my @err = ( "useSvmProps set, but failed to read SVM properties\n",
  507. "(svm:source, svm:uuid) ",
  508. "from the following URLs:\n" );
  509. sub read_svm_props {
  510. my ($self, $ra, $path, $r) = @_;
  511. my $props = ($ra->get_dir($path, $r))[2];
  512. my $src = $props->{'svm:source'};
  513. my $uuid = $props->{'svm:uuid'};
  514. return undef if (!$src || !$uuid);
  515. chomp($src, $uuid);
  516. $uuid =~ m{^[0-9a-f\-]{30,}$}i
  517. or die "doesn't look right - svm:uuid is '$uuid'\n";
  518. # the '!' is used to mark the repos_root!/relative/path
  519. $src =~ s{/?!/?}{/};
  520. $src =~ s{/+$}{}; # no trailing slashes please
  521. # username is of no interest
  522. $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
  523. my $replace = add_path_to_url($ra->url, $path);
  524. my $section = "svn-remote.$self->{repo_id}";
  525. tmp_config("$section.svm-source", $src);
  526. tmp_config("$section.svm-replace", $replace);
  527. tmp_config("$section.svm-uuid", $uuid);
  528. $self->{svm} = {
  529. source => $src,
  530. uuid => $uuid,
  531. replace => $replace
  532. };
  533. }
  534. my $r = $ra->get_latest_revnum;
  535. my $path = $self->path;
  536. my %tried;
  537. while (length $path) {
  538. my $try = add_path_to_url($self->url, $path);
  539. unless ($tried{$try}) {
  540. return $ra if $self->read_svm_props($ra, $path, $r);
  541. $tried{$try} = 1;
  542. }
  543. $path =~ s#/?[^/]+$##;
  544. }
  545. die "Path: '$path' should be ''\n" if $path ne '';
  546. return $ra if $self->read_svm_props($ra, $path, $r);
  547. $tried{ add_path_to_url($self->url, $path) } = 1;
  548. if ($ra->{repos_root} eq $self->url) {
  549. die @err, (map { " $_\n" } keys %tried), "\n";
  550. }
  551. # nope, make sure we're connected to the repository root:
  552. my $ok;
  553. my @tried_b;
  554. $path = $ra->{svn_path};
  555. $ra = Git::SVN::Ra->new($ra->{repos_root});
  556. while (length $path) {
  557. my $try = add_path_to_url($ra->url, $path);
  558. unless ($tried{$try}) {
  559. $ok = $self->read_svm_props($ra, $path, $r);
  560. last if $ok;
  561. $tried{$try} = 1;
  562. }
  563. $path =~ s#/?[^/]+$##;
  564. }
  565. die "Path: '$path' should be ''\n" if $path ne '';
  566. $ok ||= $self->read_svm_props($ra, $path, $r);
  567. $tried{ add_path_to_url($ra->url, $path) } = 1;
  568. if (!$ok) {
  569. die @err, (map { " $_\n" } keys %tried), "\n";
  570. }
  571. Git::SVN::Ra->new($self->url);
  572. }
  573. sub svnsync {
  574. my ($self) = @_;
  575. return $self->{svnsync} if $self->{svnsync};
  576. if ($self->no_metadata) {
  577. die "Can't have both 'noMetadata' and ",
  578. "'useSvnsyncProps' options set!\n";
  579. }
  580. if ($self->rewrite_root) {
  581. die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ",
  582. "options set!\n";
  583. }
  584. if ($self->rewrite_uuid) {
  585. die "Can't have both 'useSvnsyncProps' and 'rewriteUUID' ",
  586. "options set!\n";
  587. }
  588. my $svnsync;
  589. # see if we have it in our config, first:
  590. eval {
  591. my $section = "svn-remote.$self->{repo_id}";
  592. my $url = tmp_config('--get', "$section.svnsync-url");
  593. ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
  594. die "doesn't look right - svn:sync-from-url is '$url'\n";
  595. my $uuid = tmp_config('--get', "$section.svnsync-uuid");
  596. ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
  597. die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
  598. $svnsync = { url => $url, uuid => $uuid }
  599. };
  600. if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) {
  601. return $self->{svnsync} = $svnsync;
  602. }
  603. my $err = "useSvnsyncProps set, but failed to read " .
  604. "svnsync property: svn:sync-from-";
  605. my $rp = $self->ra->rev_proplist(0);
  606. my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n";
  607. ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
  608. die "doesn't look right - svn:sync-from-url is '$url'\n";
  609. my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n";
  610. ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
  611. die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
  612. my $section = "svn-remote.$self->{repo_id}";
  613. tmp_config('--add', "$section.svnsync-uuid", $uuid);
  614. tmp_config('--add', "$section.svnsync-url", $url);
  615. return $self->{svnsync} = { url => $url, uuid => $uuid };
  616. }
  617. # this allows us to memoize our SVN::Ra UUID locally and avoid a
  618. # remote lookup (useful for 'git svn log').
  619. sub ra_uuid {
  620. my ($self) = @_;
  621. unless ($self->{ra_uuid}) {
  622. my $key = "svn-remote.$self->{repo_id}.uuid";
  623. my $uuid = eval { tmp_config('--get', $key) };
  624. if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
  625. $self->{ra_uuid} = $uuid;
  626. } else {
  627. die "ra_uuid called without URL\n" unless $self->url;
  628. $self->{ra_uuid} = $self->ra->get_uuid;
  629. tmp_config('--add', $key, $self->{ra_uuid});
  630. }
  631. }
  632. $self->{ra_uuid};
  633. }
  634. sub _set_repos_root {
  635. my ($self, $repos_root) = @_;
  636. my $k = "svn-remote.$self->{repo_id}.reposRoot";
  637. $repos_root ||= $self->ra->{repos_root};
  638. tmp_config($k, $repos_root);
  639. $repos_root;
  640. }
  641. sub repos_root {
  642. my ($self) = @_;
  643. my $k = "svn-remote.$self->{repo_id}.reposRoot";
  644. eval { tmp_config('--get', $k) } || $self->_set_repos_root;
  645. }
  646. sub ra {
  647. my ($self) = shift;
  648. my $ra = Git::SVN::Ra->new($self->url);
  649. $self->_set_repos_root($ra->{repos_root});
  650. if ($self->use_svm_props && !$self->{svm}) {
  651. if ($self->no_metadata) {
  652. die "Can't have both 'noMetadata' and ",
  653. "'useSvmProps' options set!\n";
  654. } elsif ($self->use_svnsync_props) {
  655. die "Can't have both 'useSvnsyncProps' and ",
  656. "'useSvmProps' options set!\n";
  657. }
  658. $ra = $self->_set_svm_vars($ra);
  659. $self->{-want_revprops} = 1;
  660. }
  661. $ra;
  662. }
  663. # prop_walk(PATH, REV, SUB)
  664. # -------------------------
  665. # Recursively traverse PATH at revision REV and invoke SUB for each
  666. # directory that contains a SVN property. SUB will be invoked as
  667. # follows: &SUB(gs, path, props); where `gs' is this instance of
  668. # Git::SVN, `path' the path to the directory where the properties
  669. # `props' were found. The `path' will be relative to point of checkout,
  670. # that is, if url://repo/trunk is the current Git branch, and that
  671. # directory contains a sub-directory `d', SUB will be invoked with `/d/'
  672. # as `path' (note the trailing `/').
  673. sub prop_walk {
  674. my ($self, $path, $rev, $sub) = @_;
  675. $path =~ s#^/##;
  676. my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev);
  677. $path =~ s#^/*#/#g;
  678. my $p = $path;
  679. # Strip the irrelevant part of the path.
  680. $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
  681. # Ensure the path is terminated by a `/'.
  682. $p =~ s#/*$#/#;
  683. # The properties contain all the internal SVN stuff nobody
  684. # (usually) cares about.
  685. my $interesting_props = 0;
  686. foreach (keys %{$props}) {
  687. # If it doesn't start with `svn:', it must be a
  688. # user-defined property.
  689. ++$interesting_props and next if $_ !~ /^svn:/;
  690. # FIXME: Fragile, if SVN adds new public properties,
  691. # this needs to be updated.
  692. ++$interesting_props if /^svn:(?:ignore|keywords|executable
  693. |eol-style|mime-type
  694. |externals|needs-lock)$/x;
  695. }
  696. &$sub($self, $p, $props) if $interesting_props;
  697. foreach (sort keys %$dirent) {
  698. next if $dirent->{$_}->{kind} != $SVN::Node::dir;
  699. $self->prop_walk($self->path . $p . $_, $rev, $sub);
  700. }
  701. }
  702. sub last_rev { ($_[0]->last_rev_commit)[0] }
  703. sub last_commit { ($_[0]->last_rev_commit)[1] }
  704. # returns the newest SVN revision number and newest commit SHA1
  705. sub last_rev_commit {
  706. my ($self) = @_;
  707. if (defined $self->{last_rev} && defined $self->{last_commit}) {
  708. return ($self->{last_rev}, $self->{last_commit});
  709. }
  710. my $c = ::verify_ref($self->refname.'^0');
  711. if ($c && !$self->use_svm_props && !$self->no_metadata) {
  712. my $rev = (::cmt_metadata($c))[1];
  713. if (defined $rev) {
  714. ($self->{last_rev}, $self->{last_commit}) = ($rev, $c);
  715. return ($rev, $c);
  716. }
  717. }
  718. my $map_path = $self->map_path;
  719. unless (-e $map_path) {
  720. ($self->{last_rev}, $self->{last_commit}) = (undef, undef);
  721. return (undef, undef);
  722. }
  723. my ($rev, $commit) = $self->rev_map_max(1);
  724. ($self->{last_rev}, $self->{last_commit}) = ($rev, $commit);
  725. return ($rev, $commit);
  726. }
  727. sub get_fetch_range {
  728. my ($self, $min, $max) = @_;
  729. $max ||= $self->ra->get_latest_revnum;
  730. $min ||= $self->rev_map_max;
  731. (++$min, $max);
  732. }
  733. sub svn_dir {
  734. command_oneline(qw(rev-parse --git-path svn));
  735. }
  736. sub tmp_config {
  737. my (@args) = @_;
  738. my $svn_dir = svn_dir();
  739. my $old_def_config = "$svn_dir/config";
  740. my $config = "$svn_dir/.metadata";
  741. if (! -f $config && -f $old_def_config) {
  742. rename $old_def_config, $config or
  743. die "Failed rename $old_def_config => $config: $!\n";
  744. }
  745. my $old_config = $ENV{GIT_CONFIG};
  746. $ENV{GIT_CONFIG} = $config;
  747. $@ = undef;
  748. my @ret = eval {
  749. unless (-f $config) {
  750. mkfile($config);
  751. open my $fh, '>', $config or
  752. die "Can't open $config: $!\n";
  753. print $fh "; This file is used internally by ",
  754. "git-svn\n" or die
  755. "Couldn't write to $config: $!\n";
  756. print $fh "; You should not have to edit it\n" or
  757. die "Couldn't write to $config: $!\n";
  758. close $fh or die "Couldn't close $config: $!\n";
  759. }
  760. command('config', @args);
  761. };
  762. my $err = $@;
  763. if (defined $old_config) {
  764. $ENV{GIT_CONFIG} = $old_config;
  765. } else {
  766. delete $ENV{GIT_CONFIG};
  767. }
  768. die $err if $err;
  769. wantarray ? @ret : $ret[0];
  770. }
  771. sub tmp_index_do {
  772. my ($self, $sub) = @_;
  773. my $old_index = $ENV{GIT_INDEX_FILE};
  774. $ENV{GIT_INDEX_FILE} = $self->{index};
  775. $@ = undef;
  776. my @ret = eval {
  777. my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
  778. mkpath([$dir]) unless -d $dir;
  779. &$sub;
  780. };
  781. my $err = $@;
  782. if (defined $old_index) {
  783. $ENV{GIT_INDEX_FILE} = $old_index;
  784. } else {
  785. delete $ENV{GIT_INDEX_FILE};
  786. }
  787. die $err if $err;
  788. wantarray ? @ret : $ret[0];
  789. }
  790. sub assert_index_clean {
  791. my ($self, $treeish) = @_;
  792. $self->tmp_index_do(sub {
  793. command_noisy('read-tree', $treeish) unless -e $self->{index};
  794. my $x = command_oneline('write-tree');
  795. my ($y) = (command(qw/cat-file commit/, $treeish) =~
  796. /^tree ($::sha1)/mo);
  797. return if $y eq $x;
  798. warn "Index mismatch: $y != $x\nrereading $treeish\n";
  799. unlink $self->{index} or die "unlink $self->{index}: $!\n";
  800. command_noisy('read-tree', $treeish);
  801. $x = command_oneline('write-tree');
  802. if ($y ne $x) {
  803. fatal "trees ($treeish) $y != $x\n",
  804. "Something is seriously wrong...";
  805. }
  806. });
  807. }
  808. sub get_commit_parents {
  809. my ($self, $log_entry) = @_;
  810. my (%seen, @ret, @tmp);
  811. # legacy support for 'set-tree'; this is only used by set_tree_cb:
  812. if (my $ip = $self->{inject_parents}) {
  813. if (my $commit = delete $ip->{$log_entry->{revision}}) {
  814. push @tmp, $commit;
  815. }
  816. }
  817. if (my $cur = ::verify_ref($self->refname.'^0')) {
  818. push @tmp, $cur;
  819. }
  820. if (my $ipd = $self->{inject_parents_dcommit}) {
  821. if (my $commit = delete $ipd->{$log_entry->{revision}}) {
  822. push @tmp, @$commit;
  823. }
  824. }
  825. push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp);
  826. while (my $p = shift @tmp) {
  827. next if $seen{$p};
  828. $seen{$p} = 1;
  829. push @ret, $p;
  830. }
  831. @ret;
  832. }
  833. sub rewrite_root {
  834. my ($self) = @_;
  835. return $self->{-rewrite_root} if exists $self->{-rewrite_root};
  836. my $k = "svn-remote.$self->{repo_id}.rewriteRoot";
  837. my $rwr = eval { command_oneline(qw/config --get/, $k) };
  838. if ($rwr) {
  839. $rwr =~ s#/+$##;
  840. if ($rwr !~ m#^[a-z\+]+://#) {
  841. die "$rwr is not a valid URL (key: $k)\n";
  842. }
  843. }
  844. $self->{-rewrite_root} = $rwr;
  845. }
  846. sub rewrite_uuid {
  847. my ($self) = @_;
  848. return $self->{-rewrite_uuid} if exists $self->{-rewrite_uuid};
  849. my $k = "svn-remote.$self->{repo_id}.rewriteUUID";
  850. my $rwid = eval { command_oneline(qw/config --get/, $k) };
  851. if ($rwid) {
  852. $rwid =~ s#/+$##;
  853. if ($rwid !~ m#^[a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12}$#) {
  854. die "$rwid is not a valid UUID (key: $k)\n";
  855. }
  856. }
  857. $self->{-rewrite_uuid} = $rwid;
  858. }
  859. sub metadata_url {
  860. my ($self) = @_;
  861. my $url = $self->rewrite_root || $self->url;
  862. return canonicalize_url( add_path_to_url( $url, $self->path ) );
  863. }
  864. sub full_url {
  865. my ($self) = @_;
  866. return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
  867. }
  868. sub full_pushurl {
  869. my ($self) = @_;
  870. if ($self->{pushurl}) {
  871. return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
  872. } else {
  873. return $self->full_url;
  874. }
  875. }
  876. sub set_commit_header_env {
  877. my ($log_entry) = @_;
  878. my %env;
  879. foreach my $ned (qw/NAME EMAIL DATE/) {
  880. foreach my $ac (qw/AUTHOR COMMITTER/) {
  881. $env{"GIT_${ac}_${ned}"} = $ENV{"GIT_${ac}_${ned}"};
  882. }
  883. }
  884. $ENV{GIT_AUTHOR_NAME} = $log_entry->{name};
  885. $ENV{GIT_AUTHOR_EMAIL} = $log_entry->{email};
  886. $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
  887. $ENV{GIT_COMMITTER_NAME} = (defined $log_entry->{commit_name})
  888. ? $log_entry->{commit_name}
  889. : $log_entry->{name};
  890. $ENV{GIT_COMMITTER_EMAIL} = (defined $log_entry->{commit_email})
  891. ? $log_entry->{commit_email}
  892. : $log_entry->{email};
  893. \%env;
  894. }
  895. sub restore_commit_header_env {
  896. my ($env) = @_;
  897. foreach my $ned (qw/NAME EMAIL DATE/) {
  898. foreach my $ac (qw/AUTHOR COMMITTER/) {
  899. my $k = "GIT_${ac}_${ned}";
  900. if (defined $env->{$k}) {
  901. $ENV{$k} = $env->{$k};
  902. } else {
  903. delete $ENV{$k};
  904. }
  905. }
  906. }
  907. }
  908. sub gc {
  909. command_noisy('gc', '--auto');
  910. };
  911. sub do_git_commit {
  912. my ($self, $log_entry) = @_;
  913. my $lr = $self->last_rev;
  914. if (defined $lr && $lr >= $log_entry->{revision}) {
  915. die "Last fetched revision of ", $self->refname,
  916. " was r$lr, but we are about to fetch: ",
  917. "r$log_entry->{revision}!\n";
  918. }
  919. if (my $c = $self->rev_map_get($log_entry->{revision})) {
  920. croak "$log_entry->{revision} = $c already exists! ",
  921. "Why are we refetching it?\n";
  922. }
  923. my $old_env = set_commit_header_env($log_entry);
  924. my $tree = $log_entry->{tree};
  925. if (!defined $tree) {
  926. $tree = $self->tmp_index_do(sub {
  927. command_oneline('write-tree') });
  928. }
  929. die "Tree is not a valid sha1: $tree\n" if $tree !~ /^$::sha1$/o;
  930. my @exec = ('git', 'commit-tree', $tree);
  931. foreach ($self->get_commit_parents($log_entry)) {
  932. push @exec, '-p', $_;
  933. }
  934. defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
  935. or croak $!;
  936. binmode $msg_fh;
  937. # we always get UTF-8 from SVN, but we may want our commits in
  938. # a different encoding.
  939. if (my $enc = Git::config('i18n.commitencoding')) {
  940. require Encode;
  941. Encode::from_to($log_entry->{log}, 'UTF-8', $enc);
  942. }
  943. print $msg_fh $log_entry->{log} or croak $!;
  944. restore_commit_header_env($old_env);
  945. unless ($self->no_metadata) {
  946. print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n"
  947. or croak $!;
  948. }
  949. $msg_fh->flush == 0 or croak $!;
  950. close $msg_fh or croak $!;
  951. chomp(my $commit = do { local $/; <$out_fh> });
  952. close $out_fh or croak $!;
  953. waitpid $pid, 0;
  954. croak $? if $?;
  955. if ($commit !~ /^$::sha1$/o) {
  956. die "Failed to commit, invalid sha1: $commit\n";
  957. }
  958. $self->rev_map_set($log_entry->{revision}, $commit, 1);
  959. $self->{last_rev} = $log_entry->{revision};
  960. $self->{last_commit} = $commit;
  961. print "r$log_entry->{revision}" unless $::_q > 1;
  962. if (defined $log_entry->{svm_revision}) {
  963. print " (\@$log_entry->{svm_revision})" unless $::_q > 1;
  964. $self->rev_map_set($log_entry->{svm_revision}, $commit,
  965. 0, $self->svm_uuid);
  966. }
  967. print " = $commit ($self->{ref_id})\n" unless $::_q > 1;
  968. if (--$_gc_nr == 0) {
  969. $_gc_nr = $_gc_period;
  970. gc();
  971. }
  972. return $commit;
  973. }
  974. sub match_paths {
  975. my ($self, $paths, $r) = @_;
  976. return 1 if $self->path eq '';
  977. if (my $path = $paths->{"/".$self->path}) {
  978. return ($path->{action} eq 'D') ? 0 : 1;
  979. }
  980. $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
  981. if (grep /$self->{path_regex}/, keys %$paths) {
  982. return 1;
  983. }
  984. my $c = '';
  985. foreach (split m#/#, $self->path) {
  986. $c .= "/$_";
  987. next unless ($paths->{$c} &&
  988. ($paths->{$c}->{action} =~ /^[AR]$/));
  989. if ($self->ra->check_path($self->path, $r) ==
  990. $SVN::Node::dir) {
  991. return 1;
  992. }
  993. }
  994. return 0;
  995. }
  996. sub find_parent_branch {
  997. my ($self, $paths, $rev) = @_;
  998. return undef unless $self->follow_parent;
  999. unless (defined $paths) {
  1000. my $err_handler = $SVN::Error::handler;
  1001. $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
  1002. $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
  1003. sub { $paths = $_[0] });
  1004. $SVN::Error::handler = $err_handler;
  1005. }
  1006. return undef unless defined $paths;
  1007. # look for a parent from another branch:
  1008. my @b_path_components = split m#/#, $self->path;
  1009. my @a_path_components;
  1010. my $i;
  1011. while (@b_path_components) {
  1012. $i = $paths->{'/'.join('/', @b_path_components)};
  1013. last if $i && defined $i->{copyfrom_path};
  1014. unshift(@a_path_components, pop(@b_path_components));
  1015. }
  1016. return undef unless defined $i && defined $i->{copyfrom_path};
  1017. my $branch_from = $i->{copyfrom_path};
  1018. if (@a_path_components) {
  1019. print STDERR "branch_from: $branch_from => ";
  1020. $branch_from .= '/'.join('/', @a_path_components);
  1021. print STDERR $branch_from, "\n";
  1022. }
  1023. my $r = $i->{copyfrom_rev};
  1024. my $repos_root = $self->ra->{repos_root};
  1025. my $url = $self->ra->url;
  1026. my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
  1027. print STDERR "Found possible branch point: ",
  1028. "$new_url => ", $self->full_url, ", $r\n"
  1029. unless $::_q > 1;
  1030. $branch_from =~ s#^/##;
  1031. my $gs = $self->other_gs($new_url, $url,
  1032. $branch_from, $r, $self->{ref_id});
  1033. my ($r0, $parent) = $gs->find_rev_before($r, 1);
  1034. {
  1035. my ($base, $head);
  1036. if (!defined $r0 || !defined $parent) {
  1037. ($base, $head) = parse_revision_argument(0, $r);
  1038. } else {
  1039. if ($r0 < $r) {
  1040. $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
  1041. 0, 1, sub { $base = $_[1] - 1 });
  1042. }
  1043. }
  1044. if (defined $base && $base <= $r) {
  1045. $gs->fetch($base, $r);
  1046. }
  1047. ($r0, $parent) = $gs->find_rev_before($r, 1);
  1048. }
  1049. if (defined $r0 && defined $parent) {
  1050. print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"
  1051. unless $::_q > 1;
  1052. my $ed;
  1053. if ($self->ra->can_do_switch) {
  1054. $self->assert_index_clean($parent);
  1055. print STDERR "Following parent with do_switch\n"
  1056. unless $::_q > 1;
  1057. # do_switch works with svn/trunk >= r22312, but that
  1058. # is not included with SVN 1.4.3 (the latest version
  1059. # at the moment), so we can't rely on it
  1060. $self->{last_rev} = $r0;
  1061. $self->{last_commit} = $parent;
  1062. $ed = Git::SVN::Fetcher->new($self, $gs->path);
  1063. $gs->ra->gs_do_switch($r0, $rev, $gs,
  1064. $self->full_url, $ed)
  1065. or die "SVN connection failed somewhere...\n";
  1066. } elsif ($self->ra->trees_match($new_url, $r0,
  1067. $self->full_url, $rev)) {
  1068. print STDERR "Trees match:\n",
  1069. " $new_url\@$r0\n",
  1070. " ${\$self->full_url}\@$rev\n",
  1071. "Following parent with no changes\n"
  1072. unless $::_q > 1;
  1073. $self->tmp_index_do(sub {
  1074. command_noisy('read-tree', $parent);
  1075. });
  1076. $self->{last_commit} = $parent;
  1077. } else {
  1078. print STDERR "Following parent with do_update\n"
  1079. unless $::_q > 1;
  1080. $ed = Git::SVN::Fetcher->new($self);
  1081. $self->ra->gs_do_update($rev, $rev, $self, $ed)
  1082. or die "SVN connection failed somewhere...\n";
  1083. }
  1084. print STDERR "Successfully followed parent\n" unless $::_q > 1;
  1085. return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
  1086. }
  1087. return undef;
  1088. }
  1089. sub do_fetch {
  1090. my ($self, $paths, $rev) = @_;
  1091. my $ed;
  1092. my ($last_rev, @parents);
  1093. if (my $lc = $self->last_commit) {
  1094. # we can have a branch that was deleted, then re-added
  1095. # under the same name but copied from another path, in
  1096. # which case we'll have multiple parents (we don't
  1097. # want to break the original ref or lose copypath info):
  1098. if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
  1099. push @{$log_entry->{parents}}, $lc;
  1100. return $log_entry;
  1101. }
  1102. $ed = Git::SVN::Fetcher->new($self);
  1103. $last_rev = $self->{last_rev};
  1104. $ed->{c} = $lc;
  1105. @parents = ($lc);
  1106. } else {
  1107. $last_rev = $rev;
  1108. if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
  1109. return $log_entry;
  1110. }
  1111. $ed = Git::SVN::Fetcher->new($self);
  1112. }
  1113. unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
  1114. die "SVN connection failed somewhere...\n";
  1115. }
  1116. $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
  1117. }
  1118. sub mkemptydirs {
  1119. my ($self, $r) = @_;
  1120. # add/remove/collect a paths table
  1121. #
  1122. # Paths are split into a tree of nodes, stored as a hash of hashes.
  1123. #
  1124. # Each node contains a 'path' entry for the path (if any) associated
  1125. # with that node and a 'children' entry for any nodes under that
  1126. # location.
  1127. #
  1128. # Removing a path requires a hash lookup for each component then
  1129. # dropping that node (and anything under it), which is substantially
  1130. # faster than a grep slice into a single hash of paths for large
  1131. # numbers of paths.
  1132. #
  1133. # For a large (200K) number of empty_dir directives this reduces
  1134. # scanning time to 3 seconds vs 10 minutes for grep+delete on a single
  1135. # hash of paths.
  1136. sub add_path {
  1137. my ($paths_table, $path) = @_;
  1138. my $node_ref;
  1139. foreach my $x (split('/', $path)) {
  1140. if (!exists($paths_table->{$x})) {
  1141. $paths_table->{$x} = { children => {} };
  1142. }
  1143. $node_ref = $paths_table->{$x};
  1144. $paths_table = $paths_table->{$x}->{children};
  1145. }
  1146. $node_ref->{path} = $path;
  1147. }
  1148. sub remove_path {
  1149. my ($paths_table, $path) = @_;
  1150. my $nodes_ref;
  1151. my $node_name;
  1152. foreach my $x (split('/', $path)) {
  1153. if (!exists($paths_table->{$x})) {
  1154. return;
  1155. }
  1156. $nodes_ref = $paths_table;
  1157. $node_name = $x;
  1158. $paths_table = $paths_table->{$x}->{children};
  1159. }
  1160. delete($nodes_ref->{$node_name});
  1161. }
  1162. sub collect_paths {
  1163. my ($paths_table, $paths_ref) = @_;
  1164. foreach my $v (values %$paths_table) {
  1165. my $p = $v->{path};
  1166. my $c = $v->{children};
  1167. collect_paths($c, $paths_ref);
  1168. if (defined($p)) {
  1169. push(@$paths_ref, $p);
  1170. }
  1171. }
  1172. }
  1173. sub scan {
  1174. my ($r, $paths_table, $line) = @_;
  1175. if (defined $r && $line =~ /^r(\d+)$/) {
  1176. return 0 if $1 > $r;
  1177. } elsif ($line =~ /^ \+empty_dir: (.+)$/) {
  1178. add_path($paths_table, $1);
  1179. } elsif ($line =~ /^ \-empty_dir: (.+)$/) {
  1180. remove_path($paths_table, $1);
  1181. }
  1182. 1; # continue
  1183. };
  1184. my @empty_dirs;
  1185. my %paths_table;
  1186. my $gz_file = "$self->{dir}/unhandled.log.gz";
  1187. if (-f $gz_file) {
  1188. if (!can_compress()) {
  1189. warn "Compress::Zlib could not be found; ",
  1190. "empty directories in $gz_file will not be read\n";
  1191. } else {
  1192. my $gz = Compress::Zlib::gzopen($gz_file, "rb") or
  1193. die "Unable to open $gz_file: $!\n";
  1194. my $line;
  1195. while ($gz->gzreadline($line) > 0) {
  1196. scan($r, \%paths_table, $line) or last;
  1197. }
  1198. $gz->gzclose;
  1199. }
  1200. }
  1201. if (open my $fh, '<', "$self->{dir}/unhandled.log") {
  1202. binmode $fh or croak "binmode: $!";
  1203. while (<$fh>) {
  1204. scan($r, \%paths_table, $_) or last;
  1205. }
  1206. close $fh;
  1207. }
  1208. collect_paths(\%paths_table, \@empty_dirs);
  1209. my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
  1210. foreach my $d (sort @empty_dirs) {
  1211. $d = uri_decode($d);
  1212. $d =~ s/$strip//;
  1213. next unless length($d);
  1214. next if -d $d;
  1215. if (-e $d) {
  1216. warn "$d exists but is not a directory\n";
  1217. } else {
  1218. print "creating empty directory: $d\n";
  1219. mkpath([$d]);
  1220. }
  1221. }
  1222. }
  1223. sub get_untracked {
  1224. my ($self, $ed) = @_;
  1225. my @out;
  1226. my $h = $ed->{empty};
  1227. foreach (sort keys %$h) {
  1228. my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
  1229. push @out, " $act: " . uri_encode($_);
  1230. warn "W: $act: $_\n";
  1231. }
  1232. foreach my $t (qw/dir_prop file_prop/) {
  1233. $h = $ed->{$t} or next;
  1234. foreach my $path (sort keys %$h) {
  1235. my $ppath = $path eq '' ? '.' : $path;
  1236. foreach my $prop (sort keys %{$h->{$path}}) {
  1237. next if $SKIP_PROP{$prop};
  1238. my $v = $h->{$path}->{$prop};
  1239. my $t_ppath_prop = "$t: " .
  1240. uri_encode($ppath) . ' ' .
  1241. uri_encode($prop);
  1242. if (defined $v) {
  1243. push @out, " +$t_ppath_prop " .
  1244. uri_encode($v);
  1245. } else {
  1246. push @out, " -$t_ppath_prop";
  1247. }
  1248. }
  1249. }
  1250. }
  1251. foreach my $t (qw/absent_file absent_directory/) {
  1252. $h = $ed->{$t} or next;
  1253. foreach my $parent (sort keys %$h) {
  1254. foreach my $path (sort @{$h->{$parent}}) {
  1255. push @out, " $t: " .
  1256. uri_encode("$parent/$path");
  1257. warn "W: $t: $parent/$path ",
  1258. "Insufficient permissions?\n";
  1259. }
  1260. }
  1261. }
  1262. \@out;
  1263. }
  1264. # parse_svn_date(DATE)
  1265. # --------------------
  1266. # Given a date (in UTC) from Subversion, return a string in the format
  1267. # "<TZ Offset> <local date/time>" that Git will use.
  1268. #
  1269. # By default the parsed date will be in UTC; if $Git::SVN::_localtime
  1270. # is true we'll convert it to the local timezone instead.
  1271. sub parse_svn_date {
  1272. my $date = shift || return '+0000 1970-01-01 00:00:00';
  1273. my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
  1274. (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
  1275. croak "Unable to parse date: $date\n";
  1276. my $parsed_date; # Set next.
  1277. if ($Git::SVN::_localtime) {
  1278. # Translate the Subversion datetime to an epoch time.
  1279. # Begin by switching ourselves to $date's timezone, UTC.
  1280. my $old_env_TZ = $ENV{TZ};
  1281. $ENV{TZ} = 'UTC';
  1282. my $epoch_in_UTC =
  1283. Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y);
  1284. # Determine our local timezone (including DST) at the
  1285. # time of $epoch_in_UTC. $Git::SVN::Log::TZ stored the
  1286. # value of TZ, if any, at the time we were run.
  1287. if (defined $Git::SVN::Log::TZ) {
  1288. $ENV{TZ} = $Git::SVN::Log::TZ;
  1289. } else {
  1290. delete $ENV{TZ};
  1291. }
  1292. my $our_TZ = get_tz_offset($epoch_in_UTC);
  1293. # This converts $epoch_in_UTC into our local timezone.
  1294. my ($sec, $min, $hour, $mday, $mon, $year,
  1295. $wday, $yday, $isdst) = localtime($epoch_in_UTC);
  1296. $parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
  1297. $our_TZ, $year + 1900, $mon + 1,
  1298. $mday, $hour, $min, $sec);
  1299. # Reset us to the timezone in effect when we entered
  1300. # this routine.
  1301. if (defined $old_env_TZ) {
  1302. $ENV{TZ} = $old_env_TZ;
  1303. } else {
  1304. delete $ENV{TZ};
  1305. }
  1306. } else {
  1307. $parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
  1308. }
  1309. return $parsed_date;
  1310. }
  1311. sub other_gs {
  1312. my ($self, $new_url, $url,
  1313. $branch_from, $r, $old_ref_id) = @_;
  1314. my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from);
  1315. unless ($gs) {
  1316. my $ref_id = $old_ref_id;
  1317. $ref_id =~ s/\@\d+-*$//;
  1318. $ref_id .= "\@$r";
  1319. # just grow a tail if we're not unique enough :x
  1320. $ref_id .= '-' while find_ref($ref_id);
  1321. my ($u, $p, $repo_id) = ($new_url, '', $ref_id);
  1322. if ($u =~ s#^\Q$url\E(/|$)##) {
  1323. $p = $u;
  1324. $u = $url;
  1325. $repo_id = $self->{repo_id};
  1326. }
  1327. while (1) {
  1328. # It is possible to tag two different subdirectories at
  1329. # the same revision. If the url for an existing ref
  1330. # does not match, we must either find a ref with a
  1331. # matching url or create a new ref by growing a tail.
  1332. $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1);
  1333. my (undef, $max_commit) = $gs->rev_map_max(1);
  1334. last if (!$max_commit);
  1335. my ($url) = ::cmt_metadata($max_commit);
  1336. last if ($url eq $gs->metadata_url);
  1337. $ref_id .= '-';
  1338. }
  1339. print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1;
  1340. }
  1341. $gs
  1342. }
  1343. sub call_authors_prog {
  1344. my ($orig_author) = @_;
  1345. $orig_author = command_oneline('rev-parse', '--sq-quote', $orig_author);
  1346. my $author = `$::_authors_prog $orig_author`;
  1347. if ($? != 0) {
  1348. die "$::_authors_prog failed with exit code $?\n"
  1349. }
  1350. if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
  1351. my ($name, $email) = ($1, $2);
  1352. return [$name, $email];
  1353. } else {
  1354. die "Author: $orig_author: $::_authors_prog returned "
  1355. . "invalid author format: $author\n";
  1356. }
  1357. }
  1358. sub check_author {
  1359. my ($author) = @_;
  1360. if (defined $author) {
  1361. $author =~ s/^\s+//g;
  1362. $author =~ s/\s+$//g;
  1363. }
  1364. if (!defined $author || length $author == 0) {
  1365. $author = '(no author)';
  1366. }
  1367. if (!defined $::users{$author}) {
  1368. if (defined $::_authors_prog) {
  1369. $::users{$author} = call_authors_prog($author);
  1370. } elsif (defined $::_authors) {
  1371. die "Author: $author not defined in $::_authors file\n";
  1372. }
  1373. }
  1374. $author;
  1375. }
  1376. sub find_extra_svk_parents {
  1377. my ($self, $tickets, $parents) = @_;
  1378. # aha! svk:merge property changed...
  1379. my @tickets = split "\n", $tickets;
  1380. my @known_parents;
  1381. for my $ticket ( @tickets ) {
  1382. my ($uuid, $path, $rev) = split /:/, $ticket;
  1383. if ( $uuid eq $self->ra_uuid ) {
  1384. my $repos_root = $self->url;
  1385. my $branch_from = $path;
  1386. $branch_from =~ s{^/}{};
  1387. my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
  1388. $repos_root,
  1389. $branch_from,
  1390. $rev,
  1391. $self->{ref_id});
  1392. if ( my $commit = $gs->rev_map_get($rev, $uuid) ) {
  1393. # wahey! we found it, but it might be
  1394. # an old one (!)
  1395. push @known_parents, [ $rev, $commit ];
  1396. }
  1397. }
  1398. }
  1399. # Ordering matters; highest-numbered commit merge tickets
  1400. # first, as they may account for later merge ticket additions
  1401. # or changes.
  1402. @known_parents = map {$_->[1]} sort {$b->[0] <=> $a->[0]} @known_parents;
  1403. for my $parent ( @known_parents ) {
  1404. my @cmd = ('rev-list', $parent, map { "^$_" } @$parents );
  1405. my ($msg_fh, $ctx) = command_output_pipe(@cmd);
  1406. my $new;
  1407. while ( <$msg_fh> ) {
  1408. $new=1;last;
  1409. }
  1410. command_close_pipe($msg_fh, $ctx);
  1411. if ( $new ) {
  1412. print STDERR
  1413. "Found merge parent (svk:merge ticket): $parent\n";
  1414. push @$parents, $parent;
  1415. }
  1416. }
  1417. }
  1418. sub lookup_svn_merge {
  1419. my $uuid = shift;
  1420. my $url = shift;
  1421. my $source = shift;
  1422. my $revs = shift;
  1423. my $path = $source;
  1424. $path =~ s{^/}{};
  1425. my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
  1426. if ( !$gs ) {
  1427. warn "Couldn't find revmap for $url$source\n";
  1428. return;
  1429. }
  1430. my @ranges = split ",", $revs;
  1431. my ($tip, $tip_commit);
  1432. my @merged_commit_ranges;
  1433. # find the tip
  1434. for my $range ( @ranges ) {
  1435. if ($range =~ /[*]$/) {
  1436. warn "W: Ignoring partial merge in svn:mergeinfo "
  1437. ."dirprop: $source:$range\n";
  1438. next;
  1439. }
  1440. my ($bottom, $top) = split "-", $range;
  1441. $top ||= $bottom;
  1442. my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
  1443. my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
  1444. unless ($top_commit and $bottom_commit) {
  1445. warn "W: unknown path/rev in svn:mergeinfo "
  1446. ."dirprop: $source:$range\n";
  1447. next;
  1448. }
  1449. if (scalar(command('rev-parse', "$bottom_commit^@"))) {
  1450. push @merged_commit_ranges,
  1451. "$bottom_commit^..$top_commit";
  1452. } else {
  1453. push @merged_commit_ranges, "$top_commit";
  1454. }
  1455. if ( !defined $tip or $top > $tip ) {
  1456. $tip = $top;
  1457. $tip_commit = $top_commit;
  1458. }
  1459. }
  1460. return ($tip_commit, @merged_commit_ranges);
  1461. }
  1462. sub _rev_list {
  1463. my ($msg_fh, $ctx) = command_output_pipe(
  1464. "rev-list", @_,
  1465. );
  1466. my @rv;
  1467. while ( <$msg_fh> ) {
  1468. chomp;
  1469. push @rv, $_;
  1470. }
  1471. command_close_pipe($msg_fh, $ctx);
  1472. @rv;
  1473. }
  1474. sub check_cherry_pick2 {
  1475. my $base = shift;
  1476. my $tip = shift;
  1477. my $parents = shift;
  1478. my @ranges = @_;
  1479. my %commits = map { $_ => 1 }
  1480. _rev_list("--no-merges", $tip, "--not", $base, @$parents, "--");
  1481. for my $range ( @ranges ) {
  1482. delete @commits{_rev_list($range, "--")};
  1483. }
  1484. for my $commit (keys %commits) {
  1485. if (has_no_changes($commit)) {
  1486. delete $commits{$commit};
  1487. }
  1488. }
  1489. my @k = (keys %commits);
  1490. return (scalar @k, $k[0]);
  1491. }
  1492. sub has_no_changes {
  1493. my $commit = shift;
  1494. my @revs = split / /, command_oneline(
  1495. qw(rev-list --parents -1 -m), $commit);
  1496. # Commits with no parents, e.g. the start of a partial branch,
  1497. # have changes by definition.
  1498. return 1 if (@revs < 2);
  1499. # Commits with multiple parents, e.g a merge, have no changes
  1500. # by definition.
  1501. return 0 if (@revs > 2);
  1502. return (command_oneline("rev-parse", "$commit^{tree}") eq
  1503. command_oneline("rev-parse", "$commit~1^{tree}"));
  1504. }
  1505. sub tie_for_persistent_memoization {
  1506. my $hash = shift;
  1507. my $path = shift;
  1508. unless ($memo_backend) {
  1509. if (eval { require Git::SVN::Memoize::YAML; 1}) {
  1510. $memo_backend = 1;
  1511. } else {
  1512. require Memoize::Storable;
  1513. $memo_backend = -1;
  1514. }
  1515. }
  1516. if ($memo_backend > 0) {
  1517. tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
  1518. } else {
  1519. # first verify that any existing file can actually be loaded
  1520. # (it may have been saved by an incompatible version)
  1521. my $db = "$path.db";
  1522. if (-e $db) {
  1523. use Storable qw(retrieve);
  1524. if (!eval { retrieve($db); 1 }) {
  1525. unlink $db or die "unlink $db failed: $!";
  1526. }
  1527. }
  1528. tie %$hash => 'Memoize::Storable', $db, 'nstore';
  1529. }
  1530. }
  1531. # The GIT_DIR environment variable is not always set until after the command
  1532. # line arguments are processed, so we can't memoize in a BEGIN block.
  1533. {
  1534. my $memoized = 0;
  1535. sub memoize_svn_mergeinfo_functions {
  1536. return if $memoized;
  1537. $memoized = 1;
  1538. my $cache_path = svn_dir() . '/.caches/';
  1539. mkpath([$cache_path]) unless -d $cache_path;
  1540. my %lookup_svn_merge_cache;
  1541. my %check_cherry_pick2_cache;
  1542. my %has_no_changes_cache;
  1543. tie_for_persistent_memoization(\%lookup_svn_merge_cache,
  1544. "$cache_path/lookup_svn_merge");
  1545. memoize 'lookup_svn_merge',
  1546. SCALAR_CACHE => 'FAULT',
  1547. LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
  1548. ;
  1549. tie_for_persistent_memoization(\%check_cherry_pick2_cache,
  1550. "$cache_path/check_cherry_pick2");
  1551. memoize 'check_cherry_pick2',
  1552. SCALAR_CACHE => 'FAULT',
  1553. LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
  1554. ;
  1555. tie_for_persistent_memoization(\%has_no_changes_cache,
  1556. "$cache_path/has_no_changes");
  1557. memoize 'has_no_changes',
  1558. SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
  1559. LIST_CACHE => 'FAULT',
  1560. ;
  1561. }
  1562. sub unmemoize_svn_mergeinfo_functions {
  1563. return if not $memoized;
  1564. $memoized = 0;
  1565. Memoize::unmemoize 'lookup_svn_merge';
  1566. Memoize::unmemoize 'check_cherry_pick2';
  1567. Memoize::unmemoize 'has_no_changes';
  1568. }
  1569. sub clear_memoized_mergeinfo_caches {
  1570. die "Only call this method in non-memoized context" if ($memoized);
  1571. my $cache_path = svn_dir() . '/.caches/';
  1572. return unless -d $cache_path;
  1573. for my $cache_file (("$cache_path/lookup_svn_merge",
  1574. "$cache_path/check_cherry_pick", # old
  1575. "$cache_path/check_cherry_pick2",
  1576. "$cache_path/has_no_changes")) {
  1577. for my $suffix (qw(yaml db)) {
  1578. my $file = "$cache_file.$suffix";
  1579. next unless -e $file;
  1580. unlink($file) or die "unlink($file) failed: $!\n";
  1581. }
  1582. }
  1583. }
  1584. Memoize::memoize 'Git::SVN::repos_root';
  1585. }
  1586. END {
  1587. # Force cache writeout explicitly instead of waiting for
  1588. # global destruction to avoid segfault in Storable:
  1589. # http://rt.cpan.org/Public/Bug/Display.html?id=36087
  1590. unmemoize_svn_mergeinfo_functions();
  1591. }
  1592. sub parents_exclude {
  1593. my $parents = shift;
  1594. my @commits = @_;
  1595. return unless @commits;
  1596. my @excluded;
  1597. my $excluded;
  1598. do {
  1599. my @cmd = ('rev-list', "-1", @commits, "--not", @$parents );
  1600. $excluded = command_oneline(@cmd);
  1601. if ( $excluded ) {
  1602. my @new;
  1603. my $found;
  1604. for my $commit ( @commits ) {
  1605. if ( $commit eq $excluded ) {
  1606. push @excluded, $commit;
  1607. $found++;
  1608. }
  1609. else {
  1610. push @new, $commit;
  1611. }
  1612. }
  1613. die "saw commit '$excluded' in rev-list output, "
  1614. ."but we didn't ask for that commit (wanted: @commits --not @$parents)"
  1615. unless $found;
  1616. @commits = @new;
  1617. }
  1618. }
  1619. while ($excluded and @commits);
  1620. return @excluded;
  1621. }
  1622. # Compute what's new in svn:mergeinfo.
  1623. sub mergeinfo_changes {
  1624. my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
  1625. my %minfo =…

Large files files are truncated, but you can click here to view the full file