PageRenderTime 82ms CodeModel.GetById 49ms RepoModel.GetById 2ms app.codeStats 0ms

/git-1.7.11.2/perl/Git/SVN/Fetcher.pm

#
Perl | 603 lines | 495 code | 75 blank | 33 comment | 75 complexity | cad991871946d4a0f1370c0cd85bb722 MD5 | raw file
Possible License(s): Apache-2.0, BSD-2-Clause, GPL-2.0, LGPL-2.1
  1. package Git::SVN::Fetcher;
  2. use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
  3. @deleted_gpath %added_placeholder $repo_id/;
  4. use strict;
  5. use warnings;
  6. use SVN::Delta;
  7. use Carp qw/croak/;
  8. use File::Basename qw/dirname/;
  9. use IO::File qw//;
  10. use Git qw/command command_oneline command_noisy command_output_pipe
  11. command_input_pipe command_close_pipe
  12. command_bidi_pipe command_close_bidi_pipe/;
  13. BEGIN {
  14. @ISA = qw(SVN::Delta::Editor);
  15. }
  16. # file baton members: path, mode_a, mode_b, pool, fh, blob, base
  17. sub new {
  18. my ($class, $git_svn, $switch_path) = @_;
  19. my $self = SVN::Delta::Editor->new;
  20. bless $self, $class;
  21. if (exists $git_svn->{last_commit}) {
  22. $self->{c} = $git_svn->{last_commit};
  23. $self->{empty_symlinks} =
  24. _mark_empty_symlinks($git_svn, $switch_path);
  25. }
  26. # some options are read globally, but can be overridden locally
  27. # per [svn-remote "..."] section. Command-line options will *NOT*
  28. # override options set in an [svn-remote "..."] section
  29. $repo_id = $git_svn->{repo_id};
  30. my $k = "svn-remote.$repo_id.ignore-paths";
  31. my $v = eval { command_oneline('config', '--get', $k) };
  32. $self->{ignore_regex} = $v;
  33. $k = "svn-remote.$repo_id.preserve-empty-dirs";
  34. $v = eval { command_oneline('config', '--get', '--bool', $k) };
  35. if ($v && $v eq 'true') {
  36. $_preserve_empty_dirs = 1;
  37. $k = "svn-remote.$repo_id.placeholder-filename";
  38. $v = eval { command_oneline('config', '--get', $k) };
  39. $_placeholder_filename = $v;
  40. }
  41. # Load the list of placeholder files added during previous invocations.
  42. $k = "svn-remote.$repo_id.added-placeholder";
  43. $v = eval { command_oneline('config', '--get-all', $k) };
  44. if ($_preserve_empty_dirs && $v) {
  45. # command() prints errors to stderr, so we only call it if
  46. # command_oneline() succeeded.
  47. my @v = command('config', '--get-all', $k);
  48. $added_placeholder{ dirname($_) } = $_ foreach @v;
  49. }
  50. $self->{empty} = {};
  51. $self->{dir_prop} = {};
  52. $self->{file_prop} = {};
  53. $self->{absent_dir} = {};
  54. $self->{absent_file} = {};
  55. $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  56. $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  57. $self;
  58. }
  59. # this uses the Ra object, so it must be called before do_{switch,update},
  60. # not inside them (when the Git::SVN::Fetcher object is passed) to
  61. # do_{switch,update}
  62. sub _mark_empty_symlinks {
  63. my ($git_svn, $switch_path) = @_;
  64. my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  65. return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  66. my %ret;
  67. my ($rev, $cmt) = $git_svn->last_rev_commit;
  68. return {} unless ($rev && $cmt);
  69. # allow the warning to be printed for each revision we fetch to
  70. # ensure the user sees it. The user can also disable the workaround
  71. # on the repository even while git svn is running and the next
  72. # revision fetched will skip this expensive function.
  73. my $printed_warning;
  74. chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  75. my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  76. local $/ = "\0";
  77. my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path};
  78. $pfx .= '/' if length($pfx);
  79. while (<$ls>) {
  80. chomp;
  81. s/\A100644 blob $empty_blob\t//o or next;
  82. unless ($printed_warning) {
  83. print STDERR "Scanning for empty symlinks, ",
  84. "this may take a while if you have ",
  85. "many empty files\n",
  86. "You may disable this with `",
  87. "git config svn.brokenSymlinkWorkaround ",
  88. "false'.\n",
  89. "This may be done in a different ",
  90. "terminal without restarting ",
  91. "git svn\n";
  92. $printed_warning = 1;
  93. }
  94. my $path = $_;
  95. my (undef, $props) =
  96. $git_svn->ra->get_file($pfx.$path, $rev, undef);
  97. if ($props->{'svn:special'}) {
  98. $ret{$path} = 1;
  99. }
  100. }
  101. command_close_pipe($ls, $ctx);
  102. \%ret;
  103. }
  104. # returns true if a given path is inside a ".git" directory
  105. sub in_dot_git {
  106. $_[0] =~ m{(?:^|/)\.git(?:/|$)};
  107. }
  108. # return value: 0 -- don't ignore, 1 -- ignore
  109. sub is_path_ignored {
  110. my ($self, $path) = @_;
  111. return 1 if in_dot_git($path);
  112. return 1 if defined($self->{ignore_regex}) &&
  113. $path =~ m!$self->{ignore_regex}!;
  114. return 0 unless defined($_ignore_regex);
  115. return 1 if $path =~ m!$_ignore_regex!o;
  116. return 0;
  117. }
  118. sub set_path_strip {
  119. my ($self, $path) = @_;
  120. $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
  121. }
  122. sub open_root {
  123. { path => '' };
  124. }
  125. sub open_directory {
  126. my ($self, $path, $pb, $rev) = @_;
  127. { path => $path };
  128. }
  129. sub git_path {
  130. my ($self, $path) = @_;
  131. if (my $enc = $self->{pathnameencoding}) {
  132. require Encode;
  133. Encode::from_to($path, 'UTF-8', $enc);
  134. }
  135. if ($self->{path_strip}) {
  136. $path =~ s!$self->{path_strip}!! or
  137. die "Failed to strip path '$path' ($self->{path_strip})\n";
  138. }
  139. $path;
  140. }
  141. sub delete_entry {
  142. my ($self, $path, $rev, $pb) = @_;
  143. return undef if $self->is_path_ignored($path);
  144. my $gpath = $self->git_path($path);
  145. return undef if ($gpath eq '');
  146. # remove entire directories.
  147. my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  148. =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
  149. if ($tree) {
  150. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  151. -r --name-only -z/,
  152. $tree);
  153. local $/ = "\0";
  154. while (<$ls>) {
  155. chomp;
  156. my $rmpath = "$gpath/$_";
  157. $self->{gii}->remove($rmpath);
  158. print "\tD\t$rmpath\n" unless $::_q;
  159. }
  160. print "\tD\t$gpath/\n" unless $::_q;
  161. command_close_pipe($ls, $ctx);
  162. } else {
  163. $self->{gii}->remove($gpath);
  164. print "\tD\t$gpath\n" unless $::_q;
  165. }
  166. # Don't add to @deleted_gpath if we're deleting a placeholder file.
  167. push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
  168. $self->{empty}->{$path} = 0;
  169. undef;
  170. }
  171. sub open_file {
  172. my ($self, $path, $pb, $rev) = @_;
  173. my ($mode, $blob);
  174. goto out if $self->is_path_ignored($path);
  175. my $gpath = $self->git_path($path);
  176. ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
  177. =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
  178. unless (defined $mode && defined $blob) {
  179. die "$path was not found in commit $self->{c} (r$rev)\n";
  180. }
  181. if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
  182. $mode = '120000';
  183. }
  184. out:
  185. { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
  186. pool => SVN::Pool->new, action => 'M' };
  187. }
  188. sub add_file {
  189. my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
  190. my $mode;
  191. if (!$self->is_path_ignored($path)) {
  192. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  193. delete $self->{empty}->{$dir};
  194. $mode = '100644';
  195. if ($added_placeholder{$dir}) {
  196. # Remove our placeholder file, if we created one.
  197. delete_entry($self, $added_placeholder{$dir})
  198. unless $path eq $added_placeholder{$dir};
  199. delete $added_placeholder{$dir}
  200. }
  201. }
  202. { path => $path, mode_a => $mode, mode_b => $mode,
  203. pool => SVN::Pool->new, action => 'A' };
  204. }
  205. sub add_directory {
  206. my ($self, $path, $cp_path, $cp_rev) = @_;
  207. goto out if $self->is_path_ignored($path);
  208. my $gpath = $self->git_path($path);
  209. if ($gpath eq '') {
  210. my ($ls, $ctx) = command_output_pipe(qw/ls-tree
  211. -r --name-only -z/,
  212. $self->{c});
  213. local $/ = "\0";
  214. while (<$ls>) {
  215. chomp;
  216. $self->{gii}->remove($_);
  217. print "\tD\t$_\n" unless $::_q;
  218. push @deleted_gpath, $gpath;
  219. }
  220. command_close_pipe($ls, $ctx);
  221. $self->{empty}->{$path} = 0;
  222. }
  223. my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
  224. delete $self->{empty}->{$dir};
  225. $self->{empty}->{$path} = 1;
  226. if ($added_placeholder{$dir}) {
  227. # Remove our placeholder file, if we created one.
  228. delete_entry($self, $added_placeholder{$dir});
  229. delete $added_placeholder{$dir}
  230. }
  231. out:
  232. { path => $path };
  233. }
  234. sub change_dir_prop {
  235. my ($self, $db, $prop, $value) = @_;
  236. return undef if $self->is_path_ignored($db->{path});
  237. $self->{dir_prop}->{$db->{path}} ||= {};
  238. $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
  239. undef;
  240. }
  241. sub absent_directory {
  242. my ($self, $path, $pb) = @_;
  243. return undef if $self->is_path_ignored($path);
  244. $self->{absent_dir}->{$pb->{path}} ||= [];
  245. push @{$self->{absent_dir}->{$pb->{path}}}, $path;
  246. undef;
  247. }
  248. sub absent_file {
  249. my ($self, $path, $pb) = @_;
  250. return undef if $self->is_path_ignored($path);
  251. $self->{absent_file}->{$pb->{path}} ||= [];
  252. push @{$self->{absent_file}->{$pb->{path}}}, $path;
  253. undef;
  254. }
  255. sub change_file_prop {
  256. my ($self, $fb, $prop, $value) = @_;
  257. return undef if $self->is_path_ignored($fb->{path});
  258. if ($prop eq 'svn:executable') {
  259. if ($fb->{mode_b} != 120000) {
  260. $fb->{mode_b} = defined $value ? 100755 : 100644;
  261. }
  262. } elsif ($prop eq 'svn:special') {
  263. $fb->{mode_b} = defined $value ? 120000 : 100644;
  264. } else {
  265. $self->{file_prop}->{$fb->{path}} ||= {};
  266. $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
  267. }
  268. undef;
  269. }
  270. sub apply_textdelta {
  271. my ($self, $fb, $exp) = @_;
  272. return undef if $self->is_path_ignored($fb->{path});
  273. my $fh = $::_repository->temp_acquire('svn_delta');
  274. # $fh gets auto-closed() by SVN::TxDelta::apply(),
  275. # (but $base does not,) so dup() it for reading in close_file
  276. open my $dup, '<&', $fh or croak $!;
  277. my $base = $::_repository->temp_acquire('git_blob');
  278. if ($fb->{blob}) {
  279. my ($base_is_link, $size);
  280. if ($fb->{mode_a} eq '120000' &&
  281. ! $self->{empty_symlinks}->{$fb->{path}}) {
  282. print $base 'link ' or die "print $!\n";
  283. $base_is_link = 1;
  284. }
  285. retry:
  286. $size = $::_repository->cat_blob($fb->{blob}, $base);
  287. die "Failed to read object $fb->{blob}" if ($size < 0);
  288. if (defined $exp) {
  289. seek $base, 0, 0 or croak $!;
  290. my $got = ::md5sum($base);
  291. if ($got ne $exp) {
  292. my $err = "Checksum mismatch: ".
  293. "$fb->{path} $fb->{blob}\n" .
  294. "expected: $exp\n" .
  295. " got: $got\n";
  296. if ($base_is_link) {
  297. warn $err,
  298. "Retrying... (possibly ",
  299. "a bad symlink from SVN)\n";
  300. $::_repository->temp_reset($base);
  301. $base_is_link = 0;
  302. goto retry;
  303. }
  304. die $err;
  305. }
  306. }
  307. }
  308. seek $base, 0, 0 or croak $!;
  309. $fb->{fh} = $fh;
  310. $fb->{base} = $base;
  311. [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
  312. }
  313. sub close_file {
  314. my ($self, $fb, $exp) = @_;
  315. return undef if $self->is_path_ignored($fb->{path});
  316. my $hash;
  317. my $path = $self->git_path($fb->{path});
  318. if (my $fh = $fb->{fh}) {
  319. if (defined $exp) {
  320. seek($fh, 0, 0) or croak $!;
  321. my $got = ::md5sum($fh);
  322. if ($got ne $exp) {
  323. die "Checksum mismatch: $path\n",
  324. "expected: $exp\n got: $got\n";
  325. }
  326. }
  327. if ($fb->{mode_b} == 120000) {
  328. sysseek($fh, 0, 0) or croak $!;
  329. my $rd = sysread($fh, my $buf, 5);
  330. if (!defined $rd) {
  331. croak "sysread: $!\n";
  332. } elsif ($rd == 0) {
  333. warn "$path has mode 120000",
  334. " but it points to nothing\n",
  335. "converting to an empty file with mode",
  336. " 100644\n";
  337. $fb->{mode_b} = '100644';
  338. } elsif ($buf ne 'link ') {
  339. warn "$path has mode 120000",
  340. " but is not a link\n";
  341. } else {
  342. my $tmp_fh = $::_repository->temp_acquire(
  343. 'svn_hash');
  344. my $res;
  345. while ($res = sysread($fh, my $str, 1024)) {
  346. my $out = syswrite($tmp_fh, $str, $res);
  347. defined($out) && $out == $res
  348. or croak("write ",
  349. Git::temp_path($tmp_fh),
  350. ": $!\n");
  351. }
  352. defined $res or croak $!;
  353. ($fh, $tmp_fh) = ($tmp_fh, $fh);
  354. Git::temp_release($tmp_fh, 1);
  355. }
  356. }
  357. $hash = $::_repository->hash_and_insert_object(
  358. Git::temp_path($fh));
  359. $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
  360. Git::temp_release($fb->{base}, 1);
  361. Git::temp_release($fh, 1);
  362. } else {
  363. $hash = $fb->{blob} or die "no blob information\n";
  364. }
  365. $fb->{pool}->clear;
  366. $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
  367. print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
  368. undef;
  369. }
  370. sub abort_edit {
  371. my $self = shift;
  372. $self->{nr} = $self->{gii}->{nr};
  373. delete $self->{gii};
  374. $self->SUPER::abort_edit(@_);
  375. }
  376. sub close_edit {
  377. my $self = shift;
  378. if ($_preserve_empty_dirs) {
  379. my @empty_dirs;
  380. # Any entry flagged as empty that also has an associated
  381. # dir_prop represents a newly created empty directory.
  382. foreach my $i (keys %{$self->{empty}}) {
  383. push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
  384. }
  385. # Search for directories that have become empty due subsequent
  386. # file deletes.
  387. push @empty_dirs, $self->find_empty_directories();
  388. # Finally, add a placeholder file to each empty directory.
  389. $self->add_placeholder_file($_) foreach (@empty_dirs);
  390. $self->stash_placeholder_list();
  391. }
  392. $self->{git_commit_ok} = 1;
  393. $self->{nr} = $self->{gii}->{nr};
  394. delete $self->{gii};
  395. $self->SUPER::close_edit(@_);
  396. }
  397. sub find_empty_directories {
  398. my ($self) = @_;
  399. my @empty_dirs;
  400. my %dirs = map { dirname($_) => 1 } @deleted_gpath;
  401. foreach my $dir (sort keys %dirs) {
  402. next if $dir eq ".";
  403. # If there have been any additions to this directory, there is
  404. # no reason to check if it is empty.
  405. my $skip_added = 0;
  406. foreach my $t (qw/dir_prop file_prop/) {
  407. foreach my $path (keys %{ $self->{$t} }) {
  408. if (exists $self->{$t}->{dirname($path)}) {
  409. $skip_added = 1;
  410. last;
  411. }
  412. }
  413. last if $skip_added;
  414. }
  415. next if $skip_added;
  416. # Use `git ls-tree` to get the filenames of this directory
  417. # that existed prior to this particular commit.
  418. my $ls = command('ls-tree', '-z', '--name-only',
  419. $self->{c}, "$dir/");
  420. my %files = map { $_ => 1 } split(/\0/, $ls);
  421. # Remove the filenames that were deleted during this commit.
  422. delete $files{$_} foreach (@deleted_gpath);
  423. # Report the directory if there are no filenames left.
  424. push @empty_dirs, $dir unless (scalar %files);
  425. }
  426. @empty_dirs;
  427. }
  428. sub add_placeholder_file {
  429. my ($self, $dir) = @_;
  430. my $path = "$dir/$_placeholder_filename";
  431. my $gpath = $self->git_path($path);
  432. my $fh = $::_repository->temp_acquire($gpath);
  433. my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
  434. Git::temp_release($fh, 1);
  435. $self->{gii}->update('100644', $hash, $gpath) or croak $!;
  436. # The directory should no longer be considered empty.
  437. delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
  438. # Keep track of any placeholder files we create.
  439. $added_placeholder{$dir} = $path;
  440. }
  441. sub stash_placeholder_list {
  442. my ($self) = @_;
  443. my $k = "svn-remote.$repo_id.added-placeholder";
  444. my $v = eval { command_oneline('config', '--get-all', $k) };
  445. command_noisy('config', '--unset-all', $k) if $v;
  446. foreach (values %added_placeholder) {
  447. command_noisy('config', '--add', $k, $_);
  448. }
  449. }
  450. 1;
  451. __END__
  452. Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
  453. =head1 SYNOPSIS
  454. use SVN::Core;
  455. use SVN::Ra;
  456. use Git::SVN;
  457. use Git::SVN::Fetcher;
  458. use Git;
  459. my $gs = Git::SVN->find_by_url($url);
  460. my $ra = SVN::Ra->new(url => $url);
  461. my $editor = Git::SVN::Fetcher->new($gs);
  462. my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
  463. 1, $editor);
  464. $reporter->set_path('', $old_rev, 0);
  465. $reporter->finish_report;
  466. my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
  467. foreach my $path (keys %{$editor->{dir_prop}) {
  468. my $props = $editor->{dir_prop}{$path};
  469. foreach my $prop (keys %$props) {
  470. print "property $prop at $path changed to $props->{$prop}\n";
  471. }
  472. }
  473. foreach my $path (keys %{$editor->{empty}) {
  474. my $action = $editor->{empty}{$path} ? 'added' : 'removed';
  475. print "empty directory $path $action\n";
  476. }
  477. foreach my $path (keys %{$editor->{file_prop}) { ... }
  478. foreach my $parent (keys %{$editor->{absent_dir}}) {
  479. my @children = @{$editor->{abstent_dir}{$parent}};
  480. print "cannot fetch directory $parent/$_: not authorized?\n"
  481. foreach @children;
  482. }
  483. foreach my $parent (keys %{$editor->{absent_file}) { ... }
  484. =head1 DESCRIPTION
  485. This is a subclass of C<SVN::Delta::Editor>, which means it implements
  486. callbacks to act as a consumer of Subversion tree deltas. This
  487. particular implementation of those callbacks is meant to store
  488. information about the resulting content which B<git svn fetch> could
  489. use to populate new commits and new entries for F<unhandled.log>.
  490. More specifically:
  491. =over
  492. =item * Additions, removals, and modifications of files are propagated
  493. to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
  494. B<git update-index>.
  495. =item * Changes in Subversion path properties are recorded in the
  496. C<dir_prop> and C<file_prop> fields (which are hashes).
  497. =item * Addition and removal of empty directories are indicated by
  498. entries with value 1 and 0 respectively in the C<empty> hash.
  499. =item * Paths that are present but cannot be conveyed (presumably due
  500. to permissions) are recorded in the C<absent_file> and
  501. C<absent_dirs> hashes. For each key, the corresponding value is
  502. a list of paths under that directory that were present but
  503. could not be conveyed.
  504. =back
  505. The interface is unstable. Do not use this module unless you are
  506. developing git-svn.
  507. =head1 DEPENDENCIES
  508. L<SVN::Delta> from the Subversion perl bindings,
  509. the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
  510. and git's L<Git> helper module.
  511. C<Git::SVN::Fetcher> has not been tested using callers other than
  512. B<git-svn> itself.
  513. =head1 SEE ALSO
  514. L<SVN::Delta>,
  515. L<Git::SVN::Editor>.
  516. =head1 INCOMPATIBILITIES
  517. None reported.
  518. =head1 BUGS
  519. None.