PageRenderTime 77ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/perl/Git/SVN/Fetcher.pm

https://github.com/rrimando/git
Perl | 604 lines | 496 code | 75 blank | 33 comment | 75 complexity | cb5e3ac5a6aff073c7bdab9382c2305c MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0, BSD-2-Clause, GPL-2.0
  1. package Git::SVN::Fetcher;
  2. use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
  3. @deleted_gpath %added_placeholder $repo_id/;
  4. use strict;
  5. use warnings;
  6. use SVN::Delta;
  7. use Carp qw/croak/;
  8. use File::Basename qw/dirname/;
  9. use IO::File qw//;
  10. use Git qw/command command_oneline command_noisy command_output_pipe
  11. command_input_pipe command_close_pipe
  12. command_bidi_pipe command_close_bidi_pipe/;
  13. BEGIN {
  14. @ISA = qw(SVN::Delta::Editor);
  15. }
  16. # file baton members: path, mode_a, mode_b, pool, fh, blob, base
  17. sub new {
  18. my ($class, $git_svn, $switch_path) = @_;
  19. my $self = SVN::Delta::Editor->new;
  20. bless $self, $class;
  21. if (exists $git_svn->{last_commit}) {
  22. $self->{c} = $git_svn->{last_commit};
  23. $self->{empty_symlinks} =
  24. _mark_empty_symlinks($git_svn, $switch_path);
  25. }
  26. # some options are read globally, but can be overridden locally
  27. # per [svn-remote "..."] section. Command-line options will *NOT*
  28. # override options set in an [svn-remote "..."] section
  29. $repo_id = $git_svn->{repo_id};
  30. my $k = "svn-remote.$repo_id.ignore-paths";
  31. my $v = eval { command_oneline('config', '--get', $k) };
  32. $self->{ignore_regex} = $v;
  33. $k = "svn-remote.$repo_id.preserve-empty-dirs";
  34. $v = eval { command_oneline('config', '--get', '--bool', $k) };
  35. if ($v && $v eq 'true') {
  36. $_preserve_empty_dirs = 1;
  37. $k = "svn-remote.$repo_id.placeholder-filename";
  38. $v = eval { command_oneline('config', '--get', $k) };
  39. $_placeholder_filename = $v;
  40. }
  41. # Load the list of placeholder files added during previous invocations.
  42. $k = "svn-remote.$repo_id.added-placeholder";
  43. $v = eval { command_oneline('config', '--get-all', $k) };
  44. if ($_preserve_empty_dirs && $v) {
  45. # command() prints errors to stderr, so we only call it if
  46. # command_oneline() succeeded.
  47. my @v = command('config', '--get-all', $k);
  48. $added_placeholder{ dirname($_) } = $_ foreach @v;
  49. }
  50. $self->{empty} = {};
  51. $self->{dir_prop} = {};
  52. $self->{file_prop} = {};
  53. $self->{absent_dir} = {};
  54. $self->{absent_file} = {};
  55. require Git::IndexInfo;
  56. $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  57. $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  58. $self;
  59. }
  60. # this uses the Ra object, so it must be called before do_{switch,update},
  61. # not inside them (when the Git::SVN::Fetcher object is passed) to
  62. # do_{switch,update}
  63. sub _mark_empty_symlinks {
  64. my ($git_svn, $switch_path) = @_;
  65. my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  66. return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  67. my %ret;
  68. my ($rev, $cmt) = $git_svn->last_rev_commit;
  69. return {} unless ($rev && $cmt);
  70. # allow the warning to be printed for each revision we fetch to
  71. # ensure the user sees it. The user can also disable the workaround
  72. # on the repository even while git svn is running and the next
  73. # revision fetched will skip this expensive function.
  74. my $printed_warning;
  75. chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  76. my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  77. local $/ = "\0";
  78. my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  79. $pfx .= '/' if length($pfx);
  80. while (<$ls>) {
  81. chomp;
  82. s/\A100644 blob $empty_blob\t//o or next;
  83. unless ($printed_warning) {
  84. print STDERR "Scanning for empty symlinks, ",
  85. "this may take a while if you have ",
  86. "many empty files\n",
  87. "You may disable this with `",
  88. "git config svn.brokenSymlinkWorkaround ",
  89. "false'.\n",
  90. "This may be done in a different ",
  91. "terminal without restarting ",
  92. "git svn\n";
  93. $printed_warning = 1;
  94. }
  95. my $path = $_;
  96. my (undef, $props) =
  97. $git_svn->ra->get_file($pfx.$path, $rev, undef);
  98. if ($props->{'svn:special'}) {
  99. $ret{$path} = 1;
  100. }
  101. }
  102. command_close_pipe($ls, $ctx);
  103. \%ret;
  104. }
  105. # returns true if a given path is inside a ".git" directory
  106. sub in_dot_git {
  107. $_[0] =~ m{(?:^|/)\.git(?:/|$)};
  108. }
  109. # return value: 0 -- don't ignore, 1 -- ignore
  110. sub is_path_ignored {
  111. my ($self, $path) = @_;
  112. return 1 if in_dot_git($path);
  113. return 1 if defined($self->{ignore_regex}) &&
  114. $path =~ m!$self->{ignore_regex}!;
  115. return 0 unless defined($_ignore_regex);
  116. return 1 if $path =~ m!$_ignore_regex!o;
  117. return 0;
  118. }
  119. sub set_path_strip {
  120. my ($self, $path) = @_;
  121. $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
  122. }
  123. sub open_root {
  124. { path => '' };
  125. }
  126. sub open_directory {
  127. my ($self, $path, $pb, $rev) = @_;
  128. { path => $path };
  129. }
  130. sub git_path {
  131. my ($self, $path) = @_;
  132. if (my $enc = $self->{pathnameencoding}) {
  133. require Encode;
  134. Encode::from_to($path, 'UTF-8', $enc);
  135. }
  136. if ($self->{path_strip}) {
  137. $path =~ s!$self->{path_strip}!! or
  138. die "Failed to strip path '$path' ($self->{path_strip})\n";
  139. }
  140. $path;
  141. }
  142. sub delete_entry {
  143. my ($self, $path, $rev, $pb) = @_;
  144. return undef if $self->is_path_ignored($path);
  145. my $gpath = $self->git_path($path);
  146. return undef if ($gpath eq '');
  147. # remove entire directories.
  148. my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  149. =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
  150. if ($tree) {
  151. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  152. -r --name-only -z/,
  153. $tree);
  154. local $/ = "\0";
  155. while (<$ls>) {
  156. chomp;
  157. my $rmpath = "$gpath/$_";
  158. $self->{gii}->remove($rmpath);
  159. print "\tD\t$rmpath\n" unless $::_q;
  160. }
  161. print "\tD\t$gpath/\n" unless $::_q;
  162. command_close_pipe($ls, $ctx);
  163. } else {
  164. $self->{gii}->remove($gpath);
  165. print "\tD\t$gpath\n" unless $::_q;
  166. }
  167. # Don't add to @deleted_gpath if we're deleting a placeholder file.
  168. push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
  169. $self->{empty}->{$path} = 0;
  170. undef;
  171. }
  172. sub open_file {
  173. my ($self, $path, $pb, $rev) = @_;
  174. my ($mode, $blob);
  175. goto out if $self->is_path_ignored($path);
  176. my $gpath = $self->git_path($path);
  177. ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  178. =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
  179. unless (defined $mode && defined $blob) {
  180. die "$path was not found in commit $self->{c} (r$rev)\n";
  181. }
  182. if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
  183. $mode = '120000';
  184. }
  185. out:
  186. { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
  187. pool => SVN::Pool->new, action => 'M' };
  188. }
  189. sub add_file {
  190. my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
  191. my $mode;
  192. if (!$self->is_path_ignored($path)) {
  193. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  194. delete $self->{empty}->{$dir};
  195. $mode = '100644';
  196. if ($added_placeholder{$dir}) {
  197. # Remove our placeholder file, if we created one.
  198. delete_entry($self, $added_placeholder{$dir})
  199. unless $path eq $added_placeholder{$dir};
  200. delete $added_placeholder{$dir}
  201. }
  202. }
  203. { path => $path, mode_a => $mode, mode_b => $mode,
  204. pool => SVN::Pool->new, action => 'A' };
  205. }
  206. sub add_directory {
  207. my ($self, $path, $cp_path, $cp_rev) = @_;
  208. goto out if $self->is_path_ignored($path);
  209. my $gpath = $self->git_path($path);
  210. if ($gpath eq '') {
  211. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  212. -r --name-only -z/,
  213. $self->{c});
  214. local $/ = "\0";
  215. while (<$ls>) {
  216. chomp;
  217. $self->{gii}->remove($_);
  218. print "\tD\t$_\n" unless $::_q;
  219. push @deleted_gpath, $gpath;
  220. }
  221. command_close_pipe($ls, $ctx);
  222. $self->{empty}->{$path} = 0;
  223. }
  224. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  225. delete $self->{empty}->{$dir};
  226. $self->{empty}->{$path} = 1;
  227. if ($added_placeholder{$dir}) {
  228. # Remove our placeholder file, if we created one.
  229. delete_entry($self, $added_placeholder{$dir});
  230. delete $added_placeholder{$dir}
  231. }
  232. out:
  233. { path => $path };
  234. }
  235. sub change_dir_prop {
  236. my ($self, $db, $prop, $value) = @_;
  237. return undef if $self->is_path_ignored($db->{path});
  238. $self->{dir_prop}->{$db->{path}} ||= {};
  239. $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
  240. undef;
  241. }
  242. sub absent_directory {
  243. my ($self, $path, $pb) = @_;
  244. return undef if $self->is_path_ignored($path);
  245. $self->{absent_dir}->{$pb->{path}} ||= [];
  246. push @{$self->{absent_dir}->{$pb->{path}}}, $path;
  247. undef;
  248. }
  249. sub absent_file {
  250. my ($self, $path, $pb) = @_;
  251. return undef if $self->is_path_ignored($path);
  252. $self->{absent_file}->{$pb->{path}} ||= [];
  253. push @{$self->{absent_file}->{$pb->{path}}}, $path;
  254. undef;
  255. }
  256. sub change_file_prop {
  257. my ($self, $fb, $prop, $value) = @_;
  258. return undef if $self->is_path_ignored($fb->{path});
  259. if ($prop eq 'svn:executable') {
  260. if ($fb->{mode_b} != 120000) {
  261. $fb->{mode_b} = defined $value ? 100755 : 100644;
  262. }
  263. } elsif ($prop eq 'svn:special') {
  264. $fb->{mode_b} = defined $value ? 120000 : 100644;
  265. } else {
  266. $self->{file_prop}->{$fb->{path}} ||= {};
  267. $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
  268. }
  269. undef;
  270. }
  271. sub apply_textdelta {
  272. my ($self, $fb, $exp) = @_;
  273. return undef if $self->is_path_ignored($fb->{path});
  274. my $fh = $::_repository->temp_acquire('svn_delta');
  275. # $fh gets auto-closed() by SVN::TxDelta::apply(),
  276. # (but $base does not,) so dup() it for reading in close_file
  277. open my $dup, '<&', $fh or croak $!;
  278. my $base = $::_repository->temp_acquire('git_blob');
  279. if ($fb->{blob}) {
  280. my ($base_is_link, $size);
  281. if ($fb->{mode_a} eq '120000' &&
  282. ! $self->{empty_symlinks}->{$fb->{path}}) {
  283. print $base 'link ' or die "print $!\n";
  284. $base_is_link = 1;
  285. }
  286. retry:
  287. $size = $::_repository->cat_blob($fb->{blob}, $base);
  288. die "Failed to read object $fb->{blob}" if ($size < 0);
  289. if (defined $exp) {
  290. seek $base, 0, 0 or croak $!;
  291. my $got = ::md5sum($base);
  292. if ($got ne $exp) {
  293. my $err = "Checksum mismatch: ".
  294. "$fb->{path} $fb->{blob}\n" .
  295. "expected: $exp\n" .
  296. " got: $got\n";
  297. if ($base_is_link) {
  298. warn $err,
  299. "Retrying... (possibly ",
  300. "a bad symlink from SVN)\n";
  301. $::_repository->temp_reset($base);
  302. $base_is_link = 0;
  303. goto retry;
  304. }
  305. die $err;
  306. }
  307. }
  308. }
  309. seek $base, 0, 0 or croak $!;
  310. $fb->{fh} = $fh;
  311. $fb->{base} = $base;
  312. [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
  313. }
  314. sub close_file {
  315. my ($self, $fb, $exp) = @_;
  316. return undef if $self->is_path_ignored($fb->{path});
  317. my $hash;
  318. my $path = $self->git_path($fb->{path});
  319. if (my $fh = $fb->{fh}) {
  320. if (defined $exp) {
  321. seek($fh, 0, 0) or croak $!;
  322. my $got = ::md5sum($fh);
  323. if ($got ne $exp) {
  324. die "Checksum mismatch: $path\n",
  325. "expected: $exp\n got: $got\n";
  326. }
  327. }
  328. if ($fb->{mode_b} == 120000) {
  329. sysseek($fh, 0, 0) or croak $!;
  330. my $rd = sysread($fh, my $buf, 5);
  331. if (!defined $rd) {
  332. croak "sysread: $!\n";
  333. } elsif ($rd == 0) {
  334. warn "$path has mode 120000",
  335. " but it points to nothing\n",
  336. "converting to an empty file with mode",
  337. " 100644\n";
  338. $fb->{mode_b} = '100644';
  339. } elsif ($buf ne 'link ') {
  340. warn "$path has mode 120000",
  341. " but is not a link\n";
  342. } else {
  343. my $tmp_fh = $::_repository->temp_acquire(
  344. 'svn_hash');
  345. my $res;
  346. while ($res = sysread($fh, my $str, 1024)) {
  347. my $out = syswrite($tmp_fh, $str, $res);
  348. defined($out) && $out == $res
  349. or croak("write ",
  350. Git::temp_path($tmp_fh),
  351. ": $!\n");
  352. }
  353. defined $res or croak $!;
  354. ($fh, $tmp_fh) = ($tmp_fh, $fh);
  355. Git::temp_release($tmp_fh, 1);
  356. }
  357. }
  358. $hash = $::_repository->hash_and_insert_object(
  359. Git::temp_path($fh));
  360. $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
  361. Git::temp_release($fb->{base}, 1);
  362. Git::temp_release($fh, 1);
  363. } else {
  364. $hash = $fb->{blob} or die "no blob information\n";
  365. }
  366. $fb->{pool}->clear;
  367. $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
  368. print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
  369. undef;
  370. }
  371. sub abort_edit {
  372. my $self = shift;
  373. $self->{nr} = $self->{gii}->{nr};
  374. delete $self->{gii};
  375. $self->SUPER::abort_edit(@_);
  376. }
  377. sub close_edit {
  378. my $self = shift;
  379. if ($_preserve_empty_dirs) {
  380. my @empty_dirs;
  381. # Any entry flagged as empty that also has an associated
  382. # dir_prop represents a newly created empty directory.
  383. foreach my $i (keys %{$self->{empty}}) {
  384. push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
  385. }
  386. # Search for directories that have become empty due subsequent
  387. # file deletes.
  388. push @empty_dirs, $self->find_empty_directories();
  389. # Finally, add a placeholder file to each empty directory.
  390. $self->add_placeholder_file($_) foreach (@empty_dirs);
  391. $self->stash_placeholder_list();
  392. }
  393. $self->{git_commit_ok} = 1;
  394. $self->{nr} = $self->{gii}->{nr};
  395. delete $self->{gii};
  396. $self->SUPER::close_edit(@_);
  397. }
  398. sub find_empty_directories {
  399. my ($self) = @_;
  400. my @empty_dirs;
  401. my %dirs = map { dirname($_) => 1 } @deleted_gpath;
  402. foreach my $dir (sort keys %dirs) {
  403. next if $dir eq ".";
  404. # If there have been any additions to this directory, there is
  405. # no reason to check if it is empty.
  406. my $skip_added = 0;
  407. foreach my $t (qw/dir_prop file_prop/) {
  408. foreach my $path (keys %{ $self->{$t} }) {
  409. if (exists $self->{$t}->{dirname($path)}) {
  410. $skip_added = 1;
  411. last;
  412. }
  413. }
  414. last if $skip_added;
  415. }
  416. next if $skip_added;
  417. # Use `git ls-tree` to get the filenames of this directory
  418. # that existed prior to this particular commit.
  419. my $ls = command('ls-tree', '-z', '--name-only',
  420. $self->{c}, "$dir/");
  421. my %files = map { $_ => 1 } split(/\0/, $ls);
  422. # Remove the filenames that were deleted during this commit.
  423. delete $files{$_} foreach (@deleted_gpath);
  424. # Report the directory if there are no filenames left.
  425. push @empty_dirs, $dir unless (scalar %files);
  426. }
  427. @empty_dirs;
  428. }
  429. sub add_placeholder_file {
  430. my ($self, $dir) = @_;
  431. my $path = "$dir/$_placeholder_filename";
  432. my $gpath = $self->git_path($path);
  433. my $fh = $::_repository->temp_acquire($gpath);
  434. my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
  435. Git::temp_release($fh, 1);
  436. $self->{gii}->update('100644', $hash, $gpath) or croak $!;
  437. # The directory should no longer be considered empty.
  438. delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
  439. # Keep track of any placeholder files we create.
  440. $added_placeholder{$dir} = $path;
  441. }
  442. sub stash_placeholder_list {
  443. my ($self) = @_;
  444. my $k = "svn-remote.$repo_id.added-placeholder";
  445. my $v = eval { command_oneline('config', '--get-all', $k) };
  446. command_noisy('config', '--unset-all', $k) if $v;
  447. foreach (values %added_placeholder) {
  448. command_noisy('config', '--add', $k, $_);
  449. }
  450. }
  451. 1;
  452. __END__
  453. Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
  454. =head1 SYNOPSIS
  455. use SVN::Core;
  456. use SVN::Ra;
  457. use Git::SVN;
  458. use Git::SVN::Fetcher;
  459. use Git;
  460. my $gs = Git::SVN->find_by_url($url);
  461. my $ra = SVN::Ra->new(url => $url);
  462. my $editor = Git::SVN::Fetcher->new($gs);
  463. my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
  464. 1, $editor);
  465. $reporter->set_path('', $old_rev, 0);
  466. $reporter->finish_report;
  467. my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
  468. foreach my $path (keys %{$editor->{dir_prop}) {
  469. my $props = $editor->{dir_prop}{$path};
  470. foreach my $prop (keys %$props) {
  471. print "property $prop at $path changed to $props->{$prop}\n";
  472. }
  473. }
  474. foreach my $path (keys %{$editor->{empty}) {
  475. my $action = $editor->{empty}{$path} ? 'added' : 'removed';
  476. print "empty directory $path $action\n";
  477. }
  478. foreach my $path (keys %{$editor->{file_prop}) { ... }
  479. foreach my $parent (keys %{$editor->{absent_dir}}) {
  480. my @children = @{$editor->{abstent_dir}{$parent}};
  481. print "cannot fetch directory $parent/$_: not authorized?\n"
  482. foreach @children;
  483. }
  484. foreach my $parent (keys %{$editor->{absent_file}) { ... }
  485. =head1 DESCRIPTION
  486. This is a subclass of C<SVN::Delta::Editor>, which means it implements
  487. callbacks to act as a consumer of Subversion tree deltas. This
  488. particular implementation of those callbacks is meant to store
  489. information about the resulting content which B<git svn fetch> could
  490. use to populate new commits and new entries for F<unhandled.log>.
  491. More specifically:
  492. =over
  493. =item * Additions, removals, and modifications of files are propagated
  494. to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
  495. B<git update-index>.
  496. =item * Changes in Subversion path properties are recorded in the
  497. C<dir_prop> and C<file_prop> fields (which are hashes).
  498. =item * Addition and removal of empty directories are indicated by
  499. entries with value 1 and 0 respectively in the C<empty> hash.
  500. =item * Paths that are present but cannot be conveyed (presumably due
  501. to permissions) are recorded in the C<absent_file> and
  502. C<absent_dirs> hashes. For each key, the corresponding value is
  503. a list of paths under that directory that were present but
  504. could not be conveyed.
  505. =back
  506. The interface is unstable. Do not use this module unless you are
  507. developing git-svn.
  508. =head1 DEPENDENCIES
  509. L<SVN::Delta> from the Subversion perl bindings,
  510. the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
  511. and git's L<Git> helper module.
  512. C<Git::SVN::Fetcher> has not been tested using callers other than
  513. B<git-svn> itself.
  514. =head1 SEE ALSO
  515. L<SVN::Delta>,
  516. L<Git::SVN::Editor>.
  517. =head1 INCOMPATIBILITIES
  518. None reported.
  519. =head1 BUGS
  520. None.