PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/perl/Git/SVN/Fetcher.pm

https://bitbucket.org/mirror/git
Perl | 622 lines | 505 code | 77 blank | 40 comment | 80 complexity | 5cc2e5b8e1b49b140ece924f09010ca1 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, BSD-2-Clause
  1. package Git::SVN::Fetcher;
  2. use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
  3. $_placeholder_filename @deleted_gpath %added_placeholder
  4. $repo_id/;
  5. use strict;
  6. use warnings;
  7. use SVN::Delta;
  8. use Carp qw/croak/;
  9. use File::Basename qw/dirname/;
  10. use Git qw/command command_oneline command_noisy command_output_pipe
  11. command_input_pipe command_close_pipe
  12. command_bidi_pipe command_close_bidi_pipe
  13. get_record/;
  14. BEGIN {
  15. @ISA = qw(SVN::Delta::Editor);
  16. }
  17. # file baton members: path, mode_a, mode_b, pool, fh, blob, base
  18. sub new {
  19. my ($class, $git_svn, $switch_path) = @_;
  20. my $self = SVN::Delta::Editor->new;
  21. bless $self, $class;
  22. if (exists $git_svn->{last_commit}) {
  23. $self->{c} = $git_svn->{last_commit};
  24. $self->{empty_symlinks} =
  25. _mark_empty_symlinks($git_svn, $switch_path);
  26. }
  27. # some options are read globally, but can be overridden locally
  28. # per [svn-remote "..."] section. Command-line options will *NOT*
  29. # override options set in an [svn-remote "..."] section
  30. $repo_id = $git_svn->{repo_id};
  31. my $k = "svn-remote.$repo_id.ignore-paths";
  32. my $v = eval { command_oneline('config', '--get', $k) };
  33. $self->{ignore_regex} = $v;
  34. $k = "svn-remote.$repo_id.include-paths";
  35. $v = eval { command_oneline('config', '--get', $k) };
  36. $self->{include_regex} = $v;
  37. $k = "svn-remote.$repo_id.preserve-empty-dirs";
  38. $v = eval { command_oneline('config', '--get', '--bool', $k) };
  39. if ($v && $v eq 'true') {
  40. $_preserve_empty_dirs = 1;
  41. $k = "svn-remote.$repo_id.placeholder-filename";
  42. $v = eval { command_oneline('config', '--get', $k) };
  43. $_placeholder_filename = $v;
  44. }
  45. # Load the list of placeholder files added during previous invocations.
  46. $k = "svn-remote.$repo_id.added-placeholder";
  47. $v = eval { command_oneline('config', '--get-all', $k) };
  48. if ($_preserve_empty_dirs && $v) {
  49. # command() prints errors to stderr, so we only call it if
  50. # command_oneline() succeeded.
  51. my @v = command('config', '--get-all', $k);
  52. $added_placeholder{ dirname($_) } = $_ foreach @v;
  53. }
  54. $self->{empty} = {};
  55. $self->{dir_prop} = {};
  56. $self->{file_prop} = {};
  57. $self->{absent_dir} = {};
  58. $self->{absent_file} = {};
  59. require Git::IndexInfo;
  60. $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  61. $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  62. $self;
  63. }
  64. # this uses the Ra object, so it must be called before do_{switch,update},
  65. # not inside them (when the Git::SVN::Fetcher object is passed) to
  66. # do_{switch,update}
  67. sub _mark_empty_symlinks {
  68. my ($git_svn, $switch_path) = @_;
  69. my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  70. return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  71. my %ret;
  72. my ($rev, $cmt) = $git_svn->last_rev_commit;
  73. return {} unless ($rev && $cmt);
  74. # allow the warning to be printed for each revision we fetch to
  75. # ensure the user sees it. The user can also disable the workaround
  76. # on the repository even while git svn is running and the next
  77. # revision fetched will skip this expensive function.
  78. my $printed_warning;
  79. chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  80. my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  81. my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  82. $pfx .= '/' if length($pfx);
  83. while (defined($_ = get_record($ls, "\0"))) {
  84. s/\A100644 blob $empty_blob\t//o or next;
  85. unless ($printed_warning) {
  86. print STDERR "Scanning for empty symlinks, ",
  87. "this may take a while if you have ",
  88. "many empty files\n",
  89. "You may disable this with `",
  90. "git config svn.brokenSymlinkWorkaround ",
  91. "false'.\n",
  92. "This may be done in a different ",
  93. "terminal without restarting ",
  94. "git svn\n";
  95. $printed_warning = 1;
  96. }
  97. my $path = $_;
  98. my (undef, $props) =
  99. $git_svn->ra->get_file($pfx.$path, $rev, undef);
  100. if ($props->{'svn:special'}) {
  101. $ret{$path} = 1;
  102. }
  103. }
  104. command_close_pipe($ls, $ctx);
  105. \%ret;
  106. }
  107. # returns true if a given path is inside a ".git" directory
  108. sub in_dot_git {
  109. $_[0] =~ m{(?:^|/)\.git(?:/|$)};
  110. }
  111. # return value: 0 -- don't ignore, 1 -- ignore
  112. # This will also check whether the path is explicitly included
  113. sub is_path_ignored {
  114. my ($self, $path) = @_;
  115. return 1 if in_dot_git($path);
  116. return 1 if defined($self->{ignore_regex}) &&
  117. $path =~ m!$self->{ignore_regex}!;
  118. return 0 if defined($self->{include_regex}) &&
  119. $path =~ m!$self->{include_regex}!;
  120. return 0 if defined($_include_regex) &&
  121. $path =~ m!$_include_regex!;
  122. return 1 if defined($self->{include_regex});
  123. return 1 if defined($_include_regex);
  124. return 0 unless defined($_ignore_regex);
  125. return 1 if $path =~ m!$_ignore_regex!o;
  126. return 0;
  127. }
  128. sub set_path_strip {
  129. my ($self, $path) = @_;
  130. $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
  131. }
  132. sub open_root {
  133. { path => '' };
  134. }
  135. sub open_directory {
  136. my ($self, $path, $pb, $rev) = @_;
  137. { path => $path };
  138. }
  139. sub git_path {
  140. my ($self, $path) = @_;
  141. if (my $enc = $self->{pathnameencoding}) {
  142. require Encode;
  143. Encode::from_to($path, 'UTF-8', $enc);
  144. }
  145. if ($self->{path_strip}) {
  146. $path =~ s!$self->{path_strip}!! or
  147. die "Failed to strip path '$path' ($self->{path_strip})\n";
  148. }
  149. $path;
  150. }
  151. sub delete_entry {
  152. my ($self, $path, $rev, $pb) = @_;
  153. return undef if $self->is_path_ignored($path);
  154. my $gpath = $self->git_path($path);
  155. return undef if ($gpath eq '');
  156. # remove entire directories.
  157. my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  158. =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
  159. if ($tree) {
  160. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  161. -r --name-only -z/,
  162. $tree);
  163. while (defined($_ = get_record($ls, "\0"))) {
  164. my $rmpath = "$gpath/$_";
  165. $self->{gii}->remove($rmpath);
  166. print "\tD\t$rmpath\n" unless $::_q;
  167. }
  168. print "\tD\t$gpath/\n" unless $::_q;
  169. command_close_pipe($ls, $ctx);
  170. } else {
  171. $self->{gii}->remove($gpath);
  172. print "\tD\t$gpath\n" unless $::_q;
  173. }
  174. # Don't add to @deleted_gpath if we're deleting a placeholder file.
  175. push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
  176. $self->{empty}->{$path} = 0;
  177. undef;
  178. }
  179. sub open_file {
  180. my ($self, $path, $pb, $rev) = @_;
  181. my ($mode, $blob);
  182. goto out if $self->is_path_ignored($path);
  183. my $gpath = $self->git_path($path);
  184. ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  185. =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
  186. unless (defined $mode && defined $blob) {
  187. die "$path was not found in commit $self->{c} (r$rev)\n";
  188. }
  189. if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
  190. $mode = '120000';
  191. }
  192. out:
  193. { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
  194. pool => SVN::Pool->new, action => 'M' };
  195. }
  196. sub add_file {
  197. my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
  198. my $mode;
  199. if (!$self->is_path_ignored($path)) {
  200. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  201. delete $self->{empty}->{$dir};
  202. $mode = '100644';
  203. if ($added_placeholder{$dir}) {
  204. # Remove our placeholder file, if we created one.
  205. delete_entry($self, $added_placeholder{$dir})
  206. unless $path eq $added_placeholder{$dir};
  207. delete $added_placeholder{$dir}
  208. }
  209. }
  210. { path => $path, mode_a => $mode, mode_b => $mode,
  211. pool => SVN::Pool->new, action => 'A' };
  212. }
  213. sub add_directory {
  214. my ($self, $path, $cp_path, $cp_rev) = @_;
  215. goto out if $self->is_path_ignored($path);
  216. my $gpath = $self->git_path($path);
  217. if ($gpath eq '') {
  218. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  219. -r --name-only -z/,
  220. $self->{c});
  221. while (defined($_ = get_record($ls, "\0"))) {
  222. $self->{gii}->remove($_);
  223. print "\tD\t$_\n" unless $::_q;
  224. push @deleted_gpath, $gpath;
  225. }
  226. command_close_pipe($ls, $ctx);
  227. $self->{empty}->{$path} = 0;
  228. }
  229. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  230. delete $self->{empty}->{$dir};
  231. $self->{empty}->{$path} = 1;
  232. if ($added_placeholder{$dir}) {
  233. # Remove our placeholder file, if we created one.
  234. delete_entry($self, $added_placeholder{$dir});
  235. delete $added_placeholder{$dir}
  236. }
  237. out:
  238. { path => $path };
  239. }
  240. sub change_dir_prop {
  241. my ($self, $db, $prop, $value) = @_;
  242. return undef if $self->is_path_ignored($db->{path});
  243. $self->{dir_prop}->{$db->{path}} ||= {};
  244. $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
  245. undef;
  246. }
  247. sub absent_directory {
  248. my ($self, $path, $pb) = @_;
  249. return undef if $self->is_path_ignored($path);
  250. $self->{absent_dir}->{$pb->{path}} ||= [];
  251. push @{$self->{absent_dir}->{$pb->{path}}}, $path;
  252. undef;
  253. }
  254. sub absent_file {
  255. my ($self, $path, $pb) = @_;
  256. return undef if $self->is_path_ignored($path);
  257. $self->{absent_file}->{$pb->{path}} ||= [];
  258. push @{$self->{absent_file}->{$pb->{path}}}, $path;
  259. undef;
  260. }
  261. sub change_file_prop {
  262. my ($self, $fb, $prop, $value) = @_;
  263. return undef if $self->is_path_ignored($fb->{path});
  264. if ($prop eq 'svn:executable') {
  265. if ($fb->{mode_b} != 120000) {
  266. $fb->{mode_b} = defined $value ? 100755 : 100644;
  267. }
  268. } elsif ($prop eq 'svn:special') {
  269. $fb->{mode_b} = defined $value ? 120000 : 100644;
  270. } else {
  271. $self->{file_prop}->{$fb->{path}} ||= {};
  272. $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
  273. }
  274. undef;
  275. }
  276. sub apply_textdelta {
  277. my ($self, $fb, $exp) = @_;
  278. return undef if $self->is_path_ignored($fb->{path});
  279. my $suffix = 0;
  280. ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix");
  281. my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix");
  282. # $fh gets auto-closed() by SVN::TxDelta::apply(),
  283. # (but $base does not,) so dup() it for reading in close_file
  284. open my $dup, '<&', $fh or croak $!;
  285. my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix");
  286. # close_file may call temp_acquire on 'svn_hash', but because of the
  287. # call chain, if the temp_acquire call from close_file ends up being the
  288. # call that first creates the 'svn_hash' temp file, then the FileHandle
  289. # that's created as a result will end up in an SVN::Pool that we clear
  290. # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the
  291. # 'svn_hash' FileHandle is already created before close_file is called.
  292. my $tmp_fh = $::_repository->temp_acquire('svn_hash');
  293. $::_repository->temp_release($tmp_fh, 1);
  294. if ($fb->{blob}) {
  295. my ($base_is_link, $size);
  296. if ($fb->{mode_a} eq '120000' &&
  297. ! $self->{empty_symlinks}->{$fb->{path}}) {
  298. print $base 'link ' or die "print $!\n";
  299. $base_is_link = 1;
  300. }
  301. retry:
  302. $size = $::_repository->cat_blob($fb->{blob}, $base);
  303. die "Failed to read object $fb->{blob}" if ($size < 0);
  304. if (defined $exp) {
  305. seek $base, 0, 0 or croak $!;
  306. my $got = ::md5sum($base);
  307. if ($got ne $exp) {
  308. my $err = "Checksum mismatch: ".
  309. "$fb->{path} $fb->{blob}\n" .
  310. "expected: $exp\n" .
  311. " got: $got\n";
  312. if ($base_is_link) {
  313. warn $err,
  314. "Retrying... (possibly ",
  315. "a bad symlink from SVN)\n";
  316. $::_repository->temp_reset($base);
  317. $base_is_link = 0;
  318. goto retry;
  319. }
  320. die $err;
  321. }
  322. }
  323. }
  324. seek $base, 0, 0 or croak $!;
  325. $fb->{fh} = $fh;
  326. $fb->{base} = $base;
  327. [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
  328. }
  329. sub close_file {
  330. my ($self, $fb, $exp) = @_;
  331. return undef if $self->is_path_ignored($fb->{path});
  332. my $hash;
  333. my $path = $self->git_path($fb->{path});
  334. if (my $fh = $fb->{fh}) {
  335. if (defined $exp) {
  336. seek($fh, 0, 0) or croak $!;
  337. my $got = ::md5sum($fh);
  338. if ($got ne $exp) {
  339. die "Checksum mismatch: $path\n",
  340. "expected: $exp\n got: $got\n";
  341. }
  342. }
  343. if ($fb->{mode_b} == 120000) {
  344. sysseek($fh, 0, 0) or croak $!;
  345. my $rd = sysread($fh, my $buf, 5);
  346. if (!defined $rd) {
  347. croak "sysread: $!\n";
  348. } elsif ($rd == 0) {
  349. warn "$path has mode 120000",
  350. " but it points to nothing\n",
  351. "converting to an empty file with mode",
  352. " 100644\n";
  353. $fb->{mode_b} = '100644';
  354. } elsif ($buf ne 'link ') {
  355. warn "$path has mode 120000",
  356. " but is not a link\n";
  357. } else {
  358. my $tmp_fh = $::_repository->temp_acquire(
  359. 'svn_hash');
  360. my $res;
  361. while ($res = sysread($fh, my $str, 1024)) {
  362. my $out = syswrite($tmp_fh, $str, $res);
  363. defined($out) && $out == $res
  364. or croak("write ",
  365. Git::temp_path($tmp_fh),
  366. ": $!\n");
  367. }
  368. defined $res or croak $!;
  369. ($fh, $tmp_fh) = ($tmp_fh, $fh);
  370. Git::temp_release($tmp_fh, 1);
  371. }
  372. }
  373. $hash = $::_repository->hash_and_insert_object(
  374. Git::temp_path($fh));
  375. $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
  376. Git::temp_release($fb->{base}, 1);
  377. Git::temp_release($fh, 1);
  378. } else {
  379. $hash = $fb->{blob} or die "no blob information\n";
  380. }
  381. $fb->{pool}->clear;
  382. $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
  383. print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
  384. undef;
  385. }
  386. sub abort_edit {
  387. my $self = shift;
  388. $self->{nr} = $self->{gii}->{nr};
  389. delete $self->{gii};
  390. $self->SUPER::abort_edit(@_);
  391. }
  392. sub close_edit {
  393. my $self = shift;
  394. if ($_preserve_empty_dirs) {
  395. my @empty_dirs;
  396. # Any entry flagged as empty that also has an associated
  397. # dir_prop represents a newly created empty directory.
  398. foreach my $i (keys %{$self->{empty}}) {
  399. push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
  400. }
  401. # Search for directories that have become empty due subsequent
  402. # file deletes.
  403. push @empty_dirs, $self->find_empty_directories();
  404. # Finally, add a placeholder file to each empty directory.
  405. $self->add_placeholder_file($_) foreach (@empty_dirs);
  406. $self->stash_placeholder_list();
  407. }
  408. $self->{git_commit_ok} = 1;
  409. $self->{nr} = $self->{gii}->{nr};
  410. delete $self->{gii};
  411. $self->SUPER::close_edit(@_);
  412. }
  413. sub find_empty_directories {
  414. my ($self) = @_;
  415. my @empty_dirs;
  416. my %dirs = map { dirname($_) => 1 } @deleted_gpath;
  417. foreach my $dir (sort keys %dirs) {
  418. next if $dir eq ".";
  419. # If there have been any additions to this directory, there is
  420. # no reason to check if it is empty.
  421. my $skip_added = 0;
  422. foreach my $t (qw/dir_prop file_prop/) {
  423. foreach my $path (keys %{ $self->{$t} }) {
  424. if (exists $self->{$t}->{dirname($path)}) {
  425. $skip_added = 1;
  426. last;
  427. }
  428. }
  429. last if $skip_added;
  430. }
  431. next if $skip_added;
  432. # Use `git ls-tree` to get the filenames of this directory
  433. # that existed prior to this particular commit.
  434. my $ls = command('ls-tree', '-z', '--name-only',
  435. $self->{c}, "$dir/");
  436. my %files = map { $_ => 1 } split(/\0/, $ls);
  437. # Remove the filenames that were deleted during this commit.
  438. delete $files{$_} foreach (@deleted_gpath);
  439. # Report the directory if there are no filenames left.
  440. push @empty_dirs, $dir unless (scalar %files);
  441. }
  442. @empty_dirs;
  443. }
  444. sub add_placeholder_file {
  445. my ($self, $dir) = @_;
  446. my $path = "$dir/$_placeholder_filename";
  447. my $gpath = $self->git_path($path);
  448. my $fh = $::_repository->temp_acquire($gpath);
  449. my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
  450. Git::temp_release($fh, 1);
  451. $self->{gii}->update('100644', $hash, $gpath) or croak $!;
  452. # The directory should no longer be considered empty.
  453. delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
  454. # Keep track of any placeholder files we create.
  455. $added_placeholder{$dir} = $path;
  456. }
  457. sub stash_placeholder_list {
  458. my ($self) = @_;
  459. my $k = "svn-remote.$repo_id.added-placeholder";
  460. my $v = eval { command_oneline('config', '--get-all', $k) };
  461. command_noisy('config', '--unset-all', $k) if $v;
  462. foreach (values %added_placeholder) {
  463. command_noisy('config', '--add', $k, $_);
  464. }
  465. }
  466. 1;
  467. __END__
  468. =head1 NAME
  469. Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
  470. =head1 SYNOPSIS
  471. use SVN::Core;
  472. use SVN::Ra;
  473. use Git::SVN;
  474. use Git::SVN::Fetcher;
  475. use Git;
  476. my $gs = Git::SVN->find_by_url($url);
  477. my $ra = SVN::Ra->new(url => $url);
  478. my $editor = Git::SVN::Fetcher->new($gs);
  479. my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
  480. 1, $editor);
  481. $reporter->set_path('', $old_rev, 0);
  482. $reporter->finish_report;
  483. my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
  484. foreach my $path (keys %{$editor->{dir_prop}) {
  485. my $props = $editor->{dir_prop}{$path};
  486. foreach my $prop (keys %$props) {
  487. print "property $prop at $path changed to $props->{$prop}\n";
  488. }
  489. }
  490. foreach my $path (keys %{$editor->{empty}) {
  491. my $action = $editor->{empty}{$path} ? 'added' : 'removed';
  492. print "empty directory $path $action\n";
  493. }
  494. foreach my $path (keys %{$editor->{file_prop}) { ... }
  495. foreach my $parent (keys %{$editor->{absent_dir}}) {
  496. my @children = @{$editor->{abstent_dir}{$parent}};
  497. print "cannot fetch directory $parent/$_: not authorized?\n"
  498. foreach @children;
  499. }
  500. foreach my $parent (keys %{$editor->{absent_file}) { ... }
  501. =head1 DESCRIPTION
  502. This is a subclass of C<SVN::Delta::Editor>, which means it implements
  503. callbacks to act as a consumer of Subversion tree deltas. This
  504. particular implementation of those callbacks is meant to store
  505. information about the resulting content which B<git svn fetch> could
  506. use to populate new commits and new entries for F<unhandled.log>.
  507. More specifically:
  508. =over
  509. =item * Additions, removals, and modifications of files are propagated
  510. to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
  511. B<git update-index>.
  512. =item * Changes in Subversion path properties are recorded in the
  513. C<dir_prop> and C<file_prop> fields (which are hashes).
  514. =item * Addition and removal of empty directories are indicated by
  515. entries with value 1 and 0 respectively in the C<empty> hash.
  516. =item * Paths that are present but cannot be conveyed (presumably due
  517. to permissions) are recorded in the C<absent_file> and
  518. C<absent_dirs> hashes. For each key, the corresponding value is
  519. a list of paths under that directory that were present but
  520. could not be conveyed.
  521. =back
  522. The interface is unstable. Do not use this module unless you are
  523. developing git-svn.
  524. =head1 DEPENDENCIES
  525. L<SVN::Delta> from the Subversion perl bindings,
  526. the core L<Carp> and L<File::Basename> modules,
  527. and git's L<Git> helper module.
  528. C<Git::SVN::Fetcher> has not been tested using callers other than
  529. B<git-svn> itself.
  530. =head1 SEE ALSO
  531. L<SVN::Delta>,
  532. L<Git::SVN::Editor>.
  533. =head1 INCOMPATIBILITIES
  534. None reported.
  535. =head1 BUGS
  536. None.