PageRenderTime 51ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/perl/Git/SVN/Fetcher.pm

https://github.com/abInitioSKim/git
Perl | 620 lines | 509 code | 77 blank | 34 comment | 80 complexity | 21f84c18e282e182a04288411d2a10b4 MD5 | raw file
Possible License(s): Apache-2.0, BSD-2-Clause, GPL-2.0, LGPL-2.1
  1. package Git::SVN::Fetcher;
  2. use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
  3. $_placeholder_filename @deleted_gpath %added_placeholder
  4. $repo_id/;
  5. use strict;
  6. use warnings;
  7. use SVN::Delta;
  8. use Carp qw/croak/;
  9. use File::Basename qw/dirname/;
  10. use IO::File qw//;
  11. use Git qw/command command_oneline command_noisy command_output_pipe
  12. command_input_pipe command_close_pipe
  13. command_bidi_pipe command_close_bidi_pipe/;
  14. BEGIN {
  15. @ISA = qw(SVN::Delta::Editor);
  16. }
  17. # file baton members: path, mode_a, mode_b, pool, fh, blob, base
  18. sub new {
  19. my ($class, $git_svn, $switch_path) = @_;
  20. my $self = SVN::Delta::Editor->new;
  21. bless $self, $class;
  22. if (exists $git_svn->{last_commit}) {
  23. $self->{c} = $git_svn->{last_commit};
  24. $self->{empty_symlinks} =
  25. _mark_empty_symlinks($git_svn, $switch_path);
  26. }
  27. # some options are read globally, but can be overridden locally
  28. # per [svn-remote "..."] section. Command-line options will *NOT*
  29. # override options set in an [svn-remote "..."] section
  30. $repo_id = $git_svn->{repo_id};
  31. my $k = "svn-remote.$repo_id.ignore-paths";
  32. my $v = eval { command_oneline('config', '--get', $k) };
  33. $self->{ignore_regex} = $v;
  34. $k = "svn-remote.$repo_id.include-paths";
  35. $v = eval { command_oneline('config', '--get', $k) };
  36. $self->{include_regex} = $v;
  37. $k = "svn-remote.$repo_id.preserve-empty-dirs";
  38. $v = eval { command_oneline('config', '--get', '--bool', $k) };
  39. if ($v && $v eq 'true') {
  40. $_preserve_empty_dirs = 1;
  41. $k = "svn-remote.$repo_id.placeholder-filename";
  42. $v = eval { command_oneline('config', '--get', $k) };
  43. $_placeholder_filename = $v;
  44. }
  45. # Load the list of placeholder files added during previous invocations.
  46. $k = "svn-remote.$repo_id.added-placeholder";
  47. $v = eval { command_oneline('config', '--get-all', $k) };
  48. if ($_preserve_empty_dirs && $v) {
  49. # command() prints errors to stderr, so we only call it if
  50. # command_oneline() succeeded.
  51. my @v = command('config', '--get-all', $k);
  52. $added_placeholder{ dirname($_) } = $_ foreach @v;
  53. }
  54. $self->{empty} = {};
  55. $self->{dir_prop} = {};
  56. $self->{file_prop} = {};
  57. $self->{absent_dir} = {};
  58. $self->{absent_file} = {};
  59. require Git::IndexInfo;
  60. $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  61. $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  62. $self;
  63. }
  64. # this uses the Ra object, so it must be called before do_{switch,update},
  65. # not inside them (when the Git::SVN::Fetcher object is passed) to
  66. # do_{switch,update}
  67. sub _mark_empty_symlinks {
  68. my ($git_svn, $switch_path) = @_;
  69. my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  70. return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  71. my %ret;
  72. my ($rev, $cmt) = $git_svn->last_rev_commit;
  73. return {} unless ($rev && $cmt);
  74. # allow the warning to be printed for each revision we fetch to
  75. # ensure the user sees it. The user can also disable the workaround
  76. # on the repository even while git svn is running and the next
  77. # revision fetched will skip this expensive function.
  78. my $printed_warning;
  79. chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  80. my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  81. local $/ = "\0";
  82. my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  83. $pfx .= '/' if length($pfx);
  84. while (<$ls>) {
  85. chomp;
  86. s/\A100644 blob $empty_blob\t//o or next;
  87. unless ($printed_warning) {
  88. print STDERR "Scanning for empty symlinks, ",
  89. "this may take a while if you have ",
  90. "many empty files\n",
  91. "You may disable this with `",
  92. "git config svn.brokenSymlinkWorkaround ",
  93. "false'.\n",
  94. "This may be done in a different ",
  95. "terminal without restarting ",
  96. "git svn\n";
  97. $printed_warning = 1;
  98. }
  99. my $path = $_;
  100. my (undef, $props) =
  101. $git_svn->ra->get_file($pfx.$path, $rev, undef);
  102. if ($props->{'svn:special'}) {
  103. $ret{$path} = 1;
  104. }
  105. }
  106. command_close_pipe($ls, $ctx);
  107. \%ret;
  108. }
  109. # returns true if a given path is inside a ".git" directory
  110. sub in_dot_git {
  111. $_[0] =~ m{(?:^|/)\.git(?:/|$)};
  112. }
  113. # return value: 0 -- don't ignore, 1 -- ignore
  114. # This will also check whether the path is explicitly included
  115. sub is_path_ignored {
  116. my ($self, $path) = @_;
  117. return 1 if in_dot_git($path);
  118. return 1 if defined($self->{ignore_regex}) &&
  119. $path =~ m!$self->{ignore_regex}!;
  120. return 0 if defined($self->{include_regex}) &&
  121. $path =~ m!$self->{include_regex}!;
  122. return 0 if defined($_include_regex) &&
  123. $path =~ m!$_include_regex!;
  124. return 1 if defined($self->{include_regex});
  125. return 1 if defined($_include_regex);
  126. return 0 unless defined($_ignore_regex);
  127. return 1 if $path =~ m!$_ignore_regex!o;
  128. return 0;
  129. }
  130. sub set_path_strip {
  131. my ($self, $path) = @_;
  132. $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
  133. }
  134. sub open_root {
  135. { path => '' };
  136. }
  137. sub open_directory {
  138. my ($self, $path, $pb, $rev) = @_;
  139. { path => $path };
  140. }
  141. sub git_path {
  142. my ($self, $path) = @_;
  143. if (my $enc = $self->{pathnameencoding}) {
  144. require Encode;
  145. Encode::from_to($path, 'UTF-8', $enc);
  146. }
  147. if ($self->{path_strip}) {
  148. $path =~ s!$self->{path_strip}!! or
  149. die "Failed to strip path '$path' ($self->{path_strip})\n";
  150. }
  151. $path;
  152. }
  153. sub delete_entry {
  154. my ($self, $path, $rev, $pb) = @_;
  155. return undef if $self->is_path_ignored($path);
  156. my $gpath = $self->git_path($path);
  157. return undef if ($gpath eq '');
  158. # remove entire directories.
  159. my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  160. =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
  161. if ($tree) {
  162. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  163. -r --name-only -z/,
  164. $tree);
  165. local $/ = "\0";
  166. while (<$ls>) {
  167. chomp;
  168. my $rmpath = "$gpath/$_";
  169. $self->{gii}->remove($rmpath);
  170. print "\tD\t$rmpath\n" unless $::_q;
  171. }
  172. print "\tD\t$gpath/\n" unless $::_q;
  173. command_close_pipe($ls, $ctx);
  174. } else {
  175. $self->{gii}->remove($gpath);
  176. print "\tD\t$gpath\n" unless $::_q;
  177. }
  178. # Don't add to @deleted_gpath if we're deleting a placeholder file.
  179. push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
  180. $self->{empty}->{$path} = 0;
  181. undef;
  182. }
  183. sub open_file {
  184. my ($self, $path, $pb, $rev) = @_;
  185. my ($mode, $blob);
  186. goto out if $self->is_path_ignored($path);
  187. my $gpath = $self->git_path($path);
  188. ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  189. =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
  190. unless (defined $mode && defined $blob) {
  191. die "$path was not found in commit $self->{c} (r$rev)\n";
  192. }
  193. if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
  194. $mode = '120000';
  195. }
  196. out:
  197. { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
  198. pool => SVN::Pool->new, action => 'M' };
  199. }
  200. sub add_file {
  201. my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
  202. my $mode;
  203. if (!$self->is_path_ignored($path)) {
  204. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  205. delete $self->{empty}->{$dir};
  206. $mode = '100644';
  207. if ($added_placeholder{$dir}) {
  208. # Remove our placeholder file, if we created one.
  209. delete_entry($self, $added_placeholder{$dir})
  210. unless $path eq $added_placeholder{$dir};
  211. delete $added_placeholder{$dir}
  212. }
  213. }
  214. { path => $path, mode_a => $mode, mode_b => $mode,
  215. pool => SVN::Pool->new, action => 'A' };
  216. }
  217. sub add_directory {
  218. my ($self, $path, $cp_path, $cp_rev) = @_;
  219. goto out if $self->is_path_ignored($path);
  220. my $gpath = $self->git_path($path);
  221. if ($gpath eq '') {
  222. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  223. -r --name-only -z/,
  224. $self->{c});
  225. local $/ = "\0";
  226. while (<$ls>) {
  227. chomp;
  228. $self->{gii}->remove($_);
  229. print "\tD\t$_\n" unless $::_q;
  230. push @deleted_gpath, $gpath;
  231. }
  232. command_close_pipe($ls, $ctx);
  233. $self->{empty}->{$path} = 0;
  234. }
  235. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  236. delete $self->{empty}->{$dir};
  237. $self->{empty}->{$path} = 1;
  238. if ($added_placeholder{$dir}) {
  239. # Remove our placeholder file, if we created one.
  240. delete_entry($self, $added_placeholder{$dir});
  241. delete $added_placeholder{$dir}
  242. }
  243. out:
  244. { path => $path };
  245. }
  246. sub change_dir_prop {
  247. my ($self, $db, $prop, $value) = @_;
  248. return undef if $self->is_path_ignored($db->{path});
  249. $self->{dir_prop}->{$db->{path}} ||= {};
  250. $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
  251. undef;
  252. }
  253. sub absent_directory {
  254. my ($self, $path, $pb) = @_;
  255. return undef if $self->is_path_ignored($path);
  256. $self->{absent_dir}->{$pb->{path}} ||= [];
  257. push @{$self->{absent_dir}->{$pb->{path}}}, $path;
  258. undef;
  259. }
  260. sub absent_file {
  261. my ($self, $path, $pb) = @_;
  262. return undef if $self->is_path_ignored($path);
  263. $self->{absent_file}->{$pb->{path}} ||= [];
  264. push @{$self->{absent_file}->{$pb->{path}}}, $path;
  265. undef;
  266. }
  267. sub change_file_prop {
  268. my ($self, $fb, $prop, $value) = @_;
  269. return undef if $self->is_path_ignored($fb->{path});
  270. if ($prop eq 'svn:executable') {
  271. if ($fb->{mode_b} != 120000) {
  272. $fb->{mode_b} = defined $value ? 100755 : 100644;
  273. }
  274. } elsif ($prop eq 'svn:special') {
  275. $fb->{mode_b} = defined $value ? 120000 : 100644;
  276. } else {
  277. $self->{file_prop}->{$fb->{path}} ||= {};
  278. $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
  279. }
  280. undef;
  281. }
  282. sub apply_textdelta {
  283. my ($self, $fb, $exp) = @_;
  284. return undef if $self->is_path_ignored($fb->{path});
  285. my $suffix = 0;
  286. ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix");
  287. my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix");
  288. # $fh gets auto-closed() by SVN::TxDelta::apply(),
  289. # (but $base does not,) so dup() it for reading in close_file
  290. open my $dup, '<&', $fh or croak $!;
  291. my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix");
  292. if ($fb->{blob}) {
  293. my ($base_is_link, $size);
  294. if ($fb->{mode_a} eq '120000' &&
  295. ! $self->{empty_symlinks}->{$fb->{path}}) {
  296. print $base 'link ' or die "print $!\n";
  297. $base_is_link = 1;
  298. }
  299. retry:
  300. $size = $::_repository->cat_blob($fb->{blob}, $base);
  301. die "Failed to read object $fb->{blob}" if ($size < 0);
  302. if (defined $exp) {
  303. seek $base, 0, 0 or croak $!;
  304. my $got = ::md5sum($base);
  305. if ($got ne $exp) {
  306. my $err = "Checksum mismatch: ".
  307. "$fb->{path} $fb->{blob}\n" .
  308. "expected: $exp\n" .
  309. " got: $got\n";
  310. if ($base_is_link) {
  311. warn $err,
  312. "Retrying... (possibly ",
  313. "a bad symlink from SVN)\n";
  314. $::_repository->temp_reset($base);
  315. $base_is_link = 0;
  316. goto retry;
  317. }
  318. die $err;
  319. }
  320. }
  321. }
  322. seek $base, 0, 0 or croak $!;
  323. $fb->{fh} = $fh;
  324. $fb->{base} = $base;
  325. [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
  326. }
  327. sub close_file {
  328. my ($self, $fb, $exp) = @_;
  329. return undef if $self->is_path_ignored($fb->{path});
  330. my $hash;
  331. my $path = $self->git_path($fb->{path});
  332. if (my $fh = $fb->{fh}) {
  333. if (defined $exp) {
  334. seek($fh, 0, 0) or croak $!;
  335. my $got = ::md5sum($fh);
  336. if ($got ne $exp) {
  337. die "Checksum mismatch: $path\n",
  338. "expected: $exp\n got: $got\n";
  339. }
  340. }
  341. if ($fb->{mode_b} == 120000) {
  342. sysseek($fh, 0, 0) or croak $!;
  343. my $rd = sysread($fh, my $buf, 5);
  344. if (!defined $rd) {
  345. croak "sysread: $!\n";
  346. } elsif ($rd == 0) {
  347. warn "$path has mode 120000",
  348. " but it points to nothing\n",
  349. "converting to an empty file with mode",
  350. " 100644\n";
  351. $fb->{mode_b} = '100644';
  352. } elsif ($buf ne 'link ') {
  353. warn "$path has mode 120000",
  354. " but is not a link\n";
  355. } else {
  356. my $tmp_fh = $::_repository->temp_acquire(
  357. 'svn_hash');
  358. my $res;
  359. while ($res = sysread($fh, my $str, 1024)) {
  360. my $out = syswrite($tmp_fh, $str, $res);
  361. defined($out) && $out == $res
  362. or croak("write ",
  363. Git::temp_path($tmp_fh),
  364. ": $!\n");
  365. }
  366. defined $res or croak $!;
  367. ($fh, $tmp_fh) = ($tmp_fh, $fh);
  368. Git::temp_release($tmp_fh, 1);
  369. }
  370. }
  371. $hash = $::_repository->hash_and_insert_object(
  372. Git::temp_path($fh));
  373. $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
  374. Git::temp_release($fb->{base}, 1);
  375. Git::temp_release($fh, 1);
  376. } else {
  377. $hash = $fb->{blob} or die "no blob information\n";
  378. }
  379. $fb->{pool}->clear;
  380. $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
  381. print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
  382. undef;
  383. }
  384. sub abort_edit {
  385. my $self = shift;
  386. $self->{nr} = $self->{gii}->{nr};
  387. delete $self->{gii};
  388. $self->SUPER::abort_edit(@_);
  389. }
  390. sub close_edit {
  391. my $self = shift;
  392. if ($_preserve_empty_dirs) {
  393. my @empty_dirs;
  394. # Any entry flagged as empty that also has an associated
  395. # dir_prop represents a newly created empty directory.
  396. foreach my $i (keys %{$self->{empty}}) {
  397. push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
  398. }
  399. # Search for directories that have become empty due subsequent
  400. # file deletes.
  401. push @empty_dirs, $self->find_empty_directories();
  402. # Finally, add a placeholder file to each empty directory.
  403. $self->add_placeholder_file($_) foreach (@empty_dirs);
  404. $self->stash_placeholder_list();
  405. }
  406. $self->{git_commit_ok} = 1;
  407. $self->{nr} = $self->{gii}->{nr};
  408. delete $self->{gii};
  409. $self->SUPER::close_edit(@_);
  410. }
  411. sub find_empty_directories {
  412. my ($self) = @_;
  413. my @empty_dirs;
  414. my %dirs = map { dirname($_) => 1 } @deleted_gpath;
  415. foreach my $dir (sort keys %dirs) {
  416. next if $dir eq ".";
  417. # If there have been any additions to this directory, there is
  418. # no reason to check if it is empty.
  419. my $skip_added = 0;
  420. foreach my $t (qw/dir_prop file_prop/) {
  421. foreach my $path (keys %{ $self->{$t} }) {
  422. if (exists $self->{$t}->{dirname($path)}) {
  423. $skip_added = 1;
  424. last;
  425. }
  426. }
  427. last if $skip_added;
  428. }
  429. next if $skip_added;
  430. # Use `git ls-tree` to get the filenames of this directory
  431. # that existed prior to this particular commit.
  432. my $ls = command('ls-tree', '-z', '--name-only',
  433. $self->{c}, "$dir/");
  434. my %files = map { $_ => 1 } split(/\0/, $ls);
  435. # Remove the filenames that were deleted during this commit.
  436. delete $files{$_} foreach (@deleted_gpath);
  437. # Report the directory if there are no filenames left.
  438. push @empty_dirs, $dir unless (scalar %files);
  439. }
  440. @empty_dirs;
  441. }
  442. sub add_placeholder_file {
  443. my ($self, $dir) = @_;
  444. my $path = "$dir/$_placeholder_filename";
  445. my $gpath = $self->git_path($path);
  446. my $fh = $::_repository->temp_acquire($gpath);
  447. my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
  448. Git::temp_release($fh, 1);
  449. $self->{gii}->update('100644', $hash, $gpath) or croak $!;
  450. # The directory should no longer be considered empty.
  451. delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
  452. # Keep track of any placeholder files we create.
  453. $added_placeholder{$dir} = $path;
  454. }
  455. sub stash_placeholder_list {
  456. my ($self) = @_;
  457. my $k = "svn-remote.$repo_id.added-placeholder";
  458. my $v = eval { command_oneline('config', '--get-all', $k) };
  459. command_noisy('config', '--unset-all', $k) if $v;
  460. foreach (values %added_placeholder) {
  461. command_noisy('config', '--add', $k, $_);
  462. }
  463. }
  464. 1;
  465. __END__
  466. =head1 NAME
  467. Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
  468. =head1 SYNOPSIS
  469. use SVN::Core;
  470. use SVN::Ra;
  471. use Git::SVN;
  472. use Git::SVN::Fetcher;
  473. use Git;
  474. my $gs = Git::SVN->find_by_url($url);
  475. my $ra = SVN::Ra->new(url => $url);
  476. my $editor = Git::SVN::Fetcher->new($gs);
  477. my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
  478. 1, $editor);
  479. $reporter->set_path('', $old_rev, 0);
  480. $reporter->finish_report;
  481. my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
  482. foreach my $path (keys %{$editor->{dir_prop}) {
  483. my $props = $editor->{dir_prop}{$path};
  484. foreach my $prop (keys %$props) {
  485. print "property $prop at $path changed to $props->{$prop}\n";
  486. }
  487. }
  488. foreach my $path (keys %{$editor->{empty}) {
  489. my $action = $editor->{empty}{$path} ? 'added' : 'removed';
  490. print "empty directory $path $action\n";
  491. }
  492. foreach my $path (keys %{$editor->{file_prop}) { ... }
  493. foreach my $parent (keys %{$editor->{absent_dir}}) {
  494. my @children = @{$editor->{abstent_dir}{$parent}};
  495. print "cannot fetch directory $parent/$_: not authorized?\n"
  496. foreach @children;
  497. }
  498. foreach my $parent (keys %{$editor->{absent_file}) { ... }
  499. =head1 DESCRIPTION
  500. This is a subclass of C<SVN::Delta::Editor>, which means it implements
  501. callbacks to act as a consumer of Subversion tree deltas. This
  502. particular implementation of those callbacks is meant to store
  503. information about the resulting content which B<git svn fetch> could
  504. use to populate new commits and new entries for F<unhandled.log>.
  505. More specifically:
  506. =over
  507. =item * Additions, removals, and modifications of files are propagated
  508. to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
  509. B<git update-index>.
  510. =item * Changes in Subversion path properties are recorded in the
  511. C<dir_prop> and C<file_prop> fields (which are hashes).
  512. =item * Addition and removal of empty directories are indicated by
  513. entries with value 1 and 0 respectively in the C<empty> hash.
  514. =item * Paths that are present but cannot be conveyed (presumably due
  515. to permissions) are recorded in the C<absent_file> and
  516. C<absent_dirs> hashes. For each key, the corresponding value is
  517. a list of paths under that directory that were present but
  518. could not be conveyed.
  519. =back
  520. The interface is unstable. Do not use this module unless you are
  521. developing git-svn.
  522. =head1 DEPENDENCIES
  523. L<SVN::Delta> from the Subversion perl bindings,
  524. the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
  525. and git's L<Git> helper module.
  526. C<Git::SVN::Fetcher> has not been tested using callers other than
  527. B<git-svn> itself.
  528. =head1 SEE ALSO
  529. L<SVN::Delta>,
  530. L<Git::SVN::Editor>.
  531. =head1 INCOMPATIBILITIES
  532. None reported.
  533. =head1 BUGS
  534. None.