mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 08:38:23 +00:00
If someone configures with -Dmksymlinks to a git checkout t/porting/authors.t gets confused, and fails test. This teaches it and Porting/updateAUTHORS.p[lm] to handle this build scenario properly. Fixes #21272
1414 lines
43 KiB
Perl
1414 lines
43 KiB
Perl
package Porting::updateAUTHORS;
|
|
use strict;
|
|
use warnings;
|
|
use Data::Dumper;
|
|
use Encode qw(encode_utf8 decode_utf8 decode);
|
|
use Digest::SHA qw(sha256_base64);
|
|
use Text::Wrap qw(wrap);
|
|
use Unicode::Collate;
|
|
use Cwd qw(getcwd);
|
|
use feature 'fc';
|
|
$Text::Wrap::columns= 80;
|
|
|
|
# The style of this file is determined by:
|
|
#
|
|
# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
|
|
# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
|
|
# -fsb='#start-no-tidy' -fse='#end-no-tidy'
|
|
|
|
# Info and config for passing to git log.
|
|
# %an: author name
|
|
# %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
|
|
# %ae: author email
|
|
# %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
|
|
# %cn: committer name
|
|
# %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
|
|
# %ce: committer email
|
|
# %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
|
|
# %H: commit hash
|
|
# %h: abbreviated commit hash
|
|
# %s: subject
|
|
# %x00: print a byte from a hex code
|
|
|
|
my %field_spec= (
|
|
"an" => "author_name",
|
|
"aN" => "author_name_mm",
|
|
"ae" => "author_email",
|
|
"aE" => "author_email_mm",
|
|
"cn" => "committer_name",
|
|
"cN" => "committer_name_mm",
|
|
"ce" => "committer_email",
|
|
"cE" => "committer_email_mm",
|
|
"H" => "commit_hash",
|
|
"h" => "abbrev_hash",
|
|
"s" => "commit_subject",
|
|
);
|
|
|
|
my $Collate= Unicode::Collate->new(level => 1, indentical => 1);
|
|
my @field_codes= sort keys %field_spec;
|
|
my @field_names= map { $field_spec{$_} } @field_codes;
|
|
my $tformat= "=" . join "%x09", map { "%" . $_ } @field_codes;
|
|
|
|
sub _make_name_author_info {
|
|
my ($self, $commit_info, $name_key)= @_;
|
|
my $author_info= $self->{author_info};
|
|
(my $email_key= $name_key) =~ s/name/email/;
|
|
my $email= $commit_info->{$email_key};
|
|
my $name= $commit_info->{$name_key};
|
|
|
|
my $line= $author_info->{"email2line"}{$email}
|
|
// $author_info->{"name2line"}{ lc($name) };
|
|
|
|
$line //= sprintf "%-31s<%s>",
|
|
$commit_info->{$name_key}, $commit_info->{$email_key};
|
|
$commit_info->{ $name_key . "_canon" }= $line;
|
|
return $line;
|
|
}
|
|
|
|
sub _make_name_simple {
|
|
my ($self, $commit_info, $key)= @_;
|
|
my $name_key= $key . "_name";
|
|
my $email_key= $key . "_email";
|
|
return sprintf "%s <%s>", $commit_info->{$name_key},
|
|
lc($commit_info->{$email_key});
|
|
}
|
|
|
|
sub __fold_trim_ws {
|
|
my ($munged)= @_;
|
|
$munged =~ s/\s+/ /g;
|
|
$munged =~ s/\A\s+//;
|
|
$munged =~ s/\s+\z//;
|
|
return $munged;
|
|
}
|
|
|
|
sub _register_author {
|
|
my ($self, $name, $type)= @_;
|
|
|
|
return if $self->_logical_exclude_author($name);
|
|
|
|
my $digest= $self->_keeper_digest($name)
|
|
or return;
|
|
|
|
$self->{who_stats}{$name}{$type}++;
|
|
|
|
$self->{author_info}{"lines"}{$name}
|
|
and return;
|
|
|
|
my $munged= __fold_trim_ws($name);
|
|
if ($self->{exclude_missing}) {
|
|
$self->_exclude_contrib($name, $digest);
|
|
}
|
|
else {
|
|
$self->{author_info}{"lines"}{$name}++;
|
|
|
|
my $munged= __fold_trim_ws($name);
|
|
warn encode_utf8 sprintf
|
|
"New %s '%s' (%s) will be added to AUTHORS file.\n",
|
|
$type, $munged, $digest
|
|
if $self->{verbose};
|
|
}
|
|
}
|
|
|
|
sub git_conf_get {
|
|
my ($self, $setting)= @_;
|
|
chomp(my $value= `git config --get $setting`);
|
|
return decode_utf8 $value;
|
|
}
|
|
|
|
sub current_git_user_name {
|
|
my ($self)= @_;
|
|
return $self->git_conf_get("user.name");
|
|
}
|
|
|
|
sub current_git_user_email {
|
|
my ($self)= @_;
|
|
return $self->git_conf_get("user.email");
|
|
}
|
|
|
|
sub current_git_name_email {
|
|
my ($self, $type)= @_;
|
|
my $name=
|
|
$ENV{"GIT_\U$type\E_NAME"}
|
|
|| $self->git_conf_get("\L$type\E.name")
|
|
|| $self->current_git_user_name();
|
|
my $email=
|
|
$ENV{"GIT_\U$type\E_EMAIL"}
|
|
|| $self->git_conf_get("\L$type\E.email")
|
|
|| $self->current_git_user_email();
|
|
return $name, $email;
|
|
}
|
|
|
|
sub format_name_email {
|
|
my ($self, $name, $email)= @_;
|
|
return sprintf "%s <%s>", $name, $email;
|
|
}
|
|
|
|
sub current_committer_name_email {
|
|
my ($self, $full)= @_;
|
|
my ($n, $e)= $self->current_git_name_email("committer");
|
|
return $full ? $self->format_name_email($n, $e) : ($n, $e);
|
|
}
|
|
|
|
sub current_author_name_email {
|
|
my ($self, $full)= @_;
|
|
my ($n, $e)= $self->current_git_name_email("author");
|
|
return $full ? $self->format_name_email($n, $e) : ($n, $e);
|
|
}
|
|
|
|
sub git_status_porcelain {
|
|
my ($self)= @_;
|
|
my $status= `git status --porcelain`;
|
|
return $status // "";
|
|
}
|
|
|
|
sub finalize_commit_info {
|
|
my ($self, $commit_info)= @_;
|
|
my $author= $commit_info->{author_name_mm_canon};
|
|
my $author_stats= $self->{who_stats}{$author} ||= {};
|
|
|
|
my $file_info= $commit_info->{files} ||= {};
|
|
foreach my $file (keys %{$file_info}) {
|
|
if (!$self->{file_stats}{$file}) {
|
|
$self->{summary_stats}{num_files}++;
|
|
}
|
|
my $fs= $self->{file_stats}{$file} ||= {};
|
|
my $afs= $author_stats->{file_stats}{$file} ||= {};
|
|
my $added= $file_info->{$file}{lines_added};
|
|
my $removed= $file_info->{$file}{lines_removed};
|
|
my $delta= $file_info->{$file}{lines_delta};
|
|
defined $_ and $_ eq "-" and undef $_ for $added, $removed;
|
|
|
|
if (defined $added) {
|
|
for my $h ($author_stats, $fs, $afs) {
|
|
$h->{lines_delta} += $delta;
|
|
$h->{lines_added} += $added;
|
|
$h->{lines_removed} += $removed;
|
|
}
|
|
}
|
|
else {
|
|
$author_stats->{binary_change}++;
|
|
$fs->{binary_change}++;
|
|
$afs->{binary_change}++;
|
|
}
|
|
$afs->{commits}++
|
|
or $author_stats->{num_files}++;
|
|
|
|
$fs->{commits}++
|
|
or $self->{summary_stats}{num_files}++;
|
|
|
|
$fs->{who}{$author}++
|
|
or $self->{summary_stats}{authors}++;
|
|
}
|
|
}
|
|
|
|
sub read_commit_log {
|
|
my ($self)= @_;
|
|
my $author_info= $self->{author_info} ||= {};
|
|
my $mailmap_info= $self->{mailmap_info} ||= {};
|
|
|
|
my $commits_read= 0;
|
|
my @args= ("--pretty='format:$tformat'");
|
|
push @args, "--numstat" if $self->{numstat};
|
|
push @args, "'$self->{commit_range}'" if $self->{commit_range};
|
|
|
|
my $last_commit_info;
|
|
my $cmd= qq(git -c diff.algorithm=myers log @args);
|
|
$cmd =~ s/'/"/g if $^O =~ /Win/;
|
|
|
|
# If we run under -Dmksymlinks git might not have access to the
|
|
# .mailmap unless we chdir into the git working tree. Ideally we
|
|
# would prefer to pass in the location of the .mailmap file to git,
|
|
# but it doesn't support that as a command line option. We can't
|
|
# just chdir into the source_dir as that would break scalar PerlIO
|
|
# layer operations which are loaded late. So we chdir before we
|
|
# read the git log, and then chdir right back after we have opened
|
|
# the handle. Note there is related code in read_mailmap_file(),
|
|
# if you change this also change that.
|
|
|
|
my $cwd = getcwd();
|
|
if ($self->{source_dir}) {
|
|
chdir $self->{source_dir};
|
|
}
|
|
|
|
open my $fh, "-|", $cmd
|
|
or die "Failed to open git log pipe: $!";
|
|
|
|
chdir $cwd;
|
|
|
|
binmode($fh);
|
|
while (defined(my $line= <$fh>)) {
|
|
chomp $line;
|
|
$line= decode_utf8($line);
|
|
if ($line =~ s/^=//) {
|
|
$self->finalize_commit_info($last_commit_info)
|
|
if $last_commit_info;
|
|
}
|
|
elsif ($line =~ /\S/) {
|
|
my ($added, $removed, $file)= split /\s+/, $line;
|
|
if ($added ne "-") {
|
|
$last_commit_info->{files}{$file}= {
|
|
lines_added => $added,
|
|
lines_removed => $removed,
|
|
lines_delta => $added - $removed,
|
|
};
|
|
}
|
|
else {
|
|
$last_commit_info->{files}{$file}{binary_changes}++;
|
|
}
|
|
next;
|
|
}
|
|
else {
|
|
# whitspace only or empty line
|
|
next;
|
|
}
|
|
$commits_read++;
|
|
my $commit_info= {};
|
|
$last_commit_info= $commit_info;
|
|
@{$commit_info}{@field_names}= split /\t/, $line, 0 + @field_names;
|
|
|
|
my $author_name_mm_canon=
|
|
$self->_make_name_author_info($commit_info, "author_name_mm");
|
|
|
|
my $committer_name_mm_canon=
|
|
$self->_make_name_author_info($commit_info, "committer_name_mm");
|
|
|
|
my $author_name_real= $self->_make_name_simple($commit_info, "author");
|
|
|
|
my $committer_name_real=
|
|
$self->_make_name_simple($commit_info, "committer");
|
|
|
|
my ($author_good, $committer_good);
|
|
|
|
if ( $self->_keeper_digest($author_name_mm_canon)
|
|
&& $self->_keeper_digest($author_name_real))
|
|
{
|
|
$self->_check_name_mailmap($author_name_mm_canon, $author_name_real,
|
|
$commit_info, "author name");
|
|
$self->_register_author($author_name_mm_canon, "author");
|
|
$author_good= 1;
|
|
}
|
|
|
|
if ( $self->_keeper_digest($committer_name_mm_canon)
|
|
&& $self->_keeper_digest($committer_name_real))
|
|
{
|
|
$self->_check_name_mailmap($committer_name_mm_canon,
|
|
$committer_name_real, $commit_info, "committer name");
|
|
$self->_register_author($committer_name_mm_canon, "committer");
|
|
$committer_good= 1;
|
|
}
|
|
if ( $author_good
|
|
and $committer_good
|
|
and $committer_name_mm_canon ne $author_name_mm_canon)
|
|
{
|
|
$self->{who_stats}{$committer_name_mm_canon}{applied}++;
|
|
}
|
|
}
|
|
$self->finalize_commit_info($last_commit_info) if $last_commit_info;
|
|
if (!$commits_read) {
|
|
if ($self->{commit_range}) {
|
|
die "No commits in range '$self->{commit_range}'\n";
|
|
}
|
|
else {
|
|
die "Panic! There are no commits!\n";
|
|
}
|
|
}
|
|
return $author_info;
|
|
}
|
|
|
|
sub dupe_info {
|
|
my ($self)= @_;
|
|
my $msg= "";
|
|
foreach my $type (sort keys %{ $self->{dupe} || {} }) {
|
|
$msg .= "Duplicate \u$type in $self->{authors_file}:\n";
|
|
foreach my $key (sort keys %{ $self->{dupe}{$type} }) {
|
|
$msg .= " \u$type '$key'\n";
|
|
foreach my $line (sort keys %{ $self->{dupe}{$type}{$key} }) {
|
|
$msg .= " $line\n";
|
|
}
|
|
}
|
|
}
|
|
return $msg;
|
|
}
|
|
|
|
sub read_authors_file {
|
|
my ($self)= @_;
|
|
my $authors_file= $self->{authors_file};
|
|
|
|
my @authors_preamble;
|
|
open my $in_fh, "<", $authors_file
|
|
or die "Failed to open for read '$authors_file': $!";
|
|
my $raw_text= "";
|
|
my $found_sep= 0;
|
|
while (defined(my $line= <$in_fh>)) {
|
|
$raw_text .= $line;
|
|
$line= decode_utf8($line);
|
|
chomp $line;
|
|
push @authors_preamble, $line;
|
|
if ($line =~ /^--/) {
|
|
$found_sep= 1;
|
|
last;
|
|
}
|
|
}
|
|
if (!$found_sep) {
|
|
die sprintf <<'EOFMT', $authors_file;
|
|
Possibly corrupted authors file '%s'.
|
|
|
|
There should be a big '#' comment block at the start of the file
|
|
followed by "--" followed by a list of names and email/contact
|
|
details. We couldn't find the separator. Where did it go?
|
|
|
|
Cowardly refusing to continue until this is fixed.
|
|
EOFMT
|
|
}
|
|
my %author_info;
|
|
while (defined(my $line= <$in_fh>)) {
|
|
$raw_text .= $line;
|
|
$line= decode_utf8($line);
|
|
chomp $line;
|
|
my ($name, $email);
|
|
my $copy= $line;
|
|
$copy =~ s/\s+\z//;
|
|
if ($copy =~ s/<([^<>]*)>//) {
|
|
$email= $1;
|
|
}
|
|
elsif ($copy =~ s/\s+(\@\w+)\z//) {
|
|
$email= $1;
|
|
}
|
|
$copy =~ s/\s+\z//;
|
|
$name= $copy;
|
|
$email //= "unknown";
|
|
my $orig_name= $name;
|
|
my $orig_email= $email;
|
|
if (my $new_name= $self->{change_name_for_name}{$orig_name}) {
|
|
$name= $new_name;
|
|
}
|
|
if (my $new_name= $self->{change_name_for_email}{$orig_email}) {
|
|
$name= $new_name;
|
|
}
|
|
if (my $new_email= $self->{change_email_for_name}{$orig_name}) {
|
|
$email= $new_email;
|
|
}
|
|
if (my $new_email= $self->{change_email_for_email}{$orig_email}) {
|
|
$email= $new_email;
|
|
}
|
|
$line= sprintf "%-31s%s", $name, $email =~ /^\@/ ? $email : "<$email>";
|
|
$line =~ s/\s+<unknown>\z//;
|
|
$email= lc($email);
|
|
|
|
$line =~ s/\s+\z//;
|
|
$author_info{"lines"}{$line}++;
|
|
if ($email and $email ne "unknown") {
|
|
if (my $other= $author_info{"email2line"}{$email}) {
|
|
$self->{dupe}{email}{$email}{$other}= 1;
|
|
$self->{dupe}{email}{$email}{$line}= 1;
|
|
}
|
|
else {
|
|
$author_info{"email2line"}{$email}= $line;
|
|
}
|
|
}
|
|
if ($name and $name ne "unknown") {
|
|
if (my $other= $author_info{"name2line"}{ lc($name) }) {
|
|
$self->{dupe}{name}{$name}{$other}= 1;
|
|
$self->{dupe}{name}{$name}{$line}= 1;
|
|
}
|
|
else {
|
|
$author_info{"name2line"}{ lc($name) }= $line;
|
|
}
|
|
}
|
|
$author_info{"email2name"}{$email} //= $name
|
|
if $email
|
|
and $name
|
|
and $email ne "unknown";
|
|
$author_info{"name2email"}{$name} //= $email
|
|
if $name and $name ne "unknown";
|
|
$author_info{"clean_full"}{ __fold_trim_ws($line) }= $line;
|
|
}
|
|
close $in_fh
|
|
or die "Failed to close '$authors_file': $!";
|
|
|
|
$self->{author_info}= \%author_info;
|
|
$self->{authors_preamble}= \@authors_preamble;
|
|
$self->{authors_raw_text}= $raw_text;
|
|
return (\%author_info, \@authors_preamble, $raw_text);
|
|
}
|
|
|
|
sub update_authors_file {
|
|
my ($self)= @_;
|
|
|
|
my $author_info= $self->{author_info};
|
|
my $authors_preamble= $self->{authors_preamble};
|
|
my $authors_file= $self->{authors_file};
|
|
my $old_raw_text= $self->{authors_raw_text};
|
|
|
|
my $authors_file_new= $authors_file . ".new";
|
|
my $new_raw_text= "";
|
|
{
|
|
open my $out_fh, ">", \$new_raw_text
|
|
or die "Failed to open scalar buffer for write: $!";
|
|
foreach my $line (@$authors_preamble) {
|
|
print $out_fh encode_utf8($line), "\n"
|
|
or die "Failed to print to scalar buffer handle: $!";
|
|
}
|
|
foreach my $author (__sorted_hash_keys($author_info->{"lines"})) {
|
|
next if $self->_logical_exclude_author($author);
|
|
my $author_mm= $self->_author_to_mailmap($author);
|
|
if (!$self->_keeper_digest($author_mm)) {
|
|
next;
|
|
}
|
|
print $out_fh encode_utf8($author), "\n"
|
|
or die "Failed to print to scalar buffer handle: $!";
|
|
}
|
|
close $out_fh
|
|
or die "Failed to close scalar buffer handle: $!";
|
|
}
|
|
if ($new_raw_text ne $old_raw_text) {
|
|
$self->{changed_count}++;
|
|
$self->_log_file_changes_quick_and_dirty_diff($authors_file,
|
|
$old_raw_text, $new_raw_text);
|
|
|
|
if ($self->{no_update}) {
|
|
return 1;
|
|
}
|
|
|
|
warn "Updating '$authors_file'\n" if $self->{verbose};
|
|
|
|
open my $out_fh, ">", $authors_file_new
|
|
or die "Failed to open for write '$authors_file_new': $!";
|
|
binmode $out_fh;
|
|
print $out_fh $new_raw_text;
|
|
close $out_fh
|
|
or die "Failed to close '$authors_file_new': $!";
|
|
rename $authors_file_new, $authors_file
|
|
or die
|
|
"Failed to rename '$authors_file_new' to '$authors_file': $!";
|
|
return 1;
|
|
}
|
|
else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
sub read_mailmap_file {
|
|
my ($self)= @_;
|
|
my $mailmap_file= $self->{mailmap_file};
|
|
|
|
# If we run under -Dmksymlinks the .mailmap might actually be
|
|
# located in a different directory than the one we are running from.
|
|
# We could munge the $mailmap_file to be relative to source_dir if
|
|
# it was not already an absolute path, but that would require
|
|
# loading File::Spec, and doing THAT wouldn't help us when we read
|
|
# the git log (see the related code for doing that in the
|
|
# read_commit_log() sub), so we use the same strategy of remembering
|
|
# our current working directory, chdir'ing into the source dir,
|
|
# opening the file, and then chdir'ing back in both cases.
|
|
|
|
my $cwd = getcwd();
|
|
if ($self->{source_dir}) {
|
|
chdir $self->{source_dir};
|
|
}
|
|
open my $in, "<", $mailmap_file
|
|
or die "Failed to read '$mailmap_file': $!";
|
|
chdir $cwd;
|
|
my %mailmap_hash;
|
|
my @mailmap_preamble;
|
|
my $line_num= 0;
|
|
my $raw_text= "";
|
|
while (defined(my $line= <$in>)) {
|
|
$raw_text .= $line;
|
|
$line= decode_utf8($line);
|
|
++$line_num;
|
|
next unless $line =~ /\S/;
|
|
chomp($line);
|
|
if ($line =~ /^#/) {
|
|
if (!keys %mailmap_hash) {
|
|
push @mailmap_preamble, $line;
|
|
}
|
|
else {
|
|
die encode_utf8 "Not expecting comments after header ",
|
|
"finished at line $line_num!\nLine: $line\n";
|
|
}
|
|
}
|
|
else {
|
|
$mailmap_hash{$line}= $line_num;
|
|
}
|
|
}
|
|
close $in
|
|
or die "Failed to close '$mailmap_file' after reading: $!";
|
|
if (!@mailmap_preamble) {
|
|
die sprintf <<'EOFMT', $mailmap_file;
|
|
Possibly corrupted mailmap file '%s'.
|
|
|
|
This file should have a preamble of '#' comments in it.
|
|
|
|
Where did they go?
|
|
|
|
Cowardly refusing to continue until this is fixed.
|
|
EOFMT
|
|
}
|
|
$self->{orig_mailmap_hash}= \%mailmap_hash;
|
|
$self->{mailmap_preamble}= \@mailmap_preamble;
|
|
$self->{mailmap_raw_text}= $raw_text;
|
|
return (\%mailmap_hash, \@mailmap_preamble, $raw_text);
|
|
}
|
|
|
|
sub __sorted_hash_keys {
|
|
my ($hash)= @_;
|
|
return __sort_names(keys %$hash);
|
|
}
|
|
|
|
sub __sort_names {
|
|
my @sorted= sort { fc($a) cmp fc($b) || $a cmp $b } @_;
|
|
return @sorted;
|
|
}
|
|
|
|
# Returns 0 if the file needed to be changed, Return 1 if it does not.
|
|
sub update_mailmap_file {
|
|
my ($self)= @_;
|
|
my $mailmap_hash= $self->{new_mailmap_hash};
|
|
my $mailmap_preamble= $self->{mailmap_preamble};
|
|
my $mailmap_file= $self->{mailmap_file};
|
|
my $old_raw_text= $self->{mailmap_raw_text};
|
|
|
|
my $new_raw_text= "";
|
|
{
|
|
my $cwd = getcwd();
|
|
open my $out, ">", \$new_raw_text
|
|
or die "Failed to open scalar buffer for write: $!";
|
|
foreach
|
|
my $line (@$mailmap_preamble, __sorted_hash_keys($mailmap_hash),)
|
|
{
|
|
next if $line =~ m!\A(.*) \1\z!;
|
|
print $out encode_utf8($line), "\n"
|
|
or die "Failed to print to scalar buffer handle: $!";
|
|
}
|
|
close $out
|
|
or die "Failed to close scalar buffer handle: $!";
|
|
}
|
|
if ($new_raw_text ne $old_raw_text) {
|
|
$self->{changed_count}++;
|
|
$self->_log_file_changes_quick_and_dirty_diff($mailmap_file,
|
|
$old_raw_text, $new_raw_text);
|
|
|
|
if ($self->{no_update}) {
|
|
return 1;
|
|
}
|
|
|
|
warn "Updating '$mailmap_file'\n"
|
|
if $self->{verbose};
|
|
|
|
my $mailmap_file_new= $mailmap_file . ".new";
|
|
open my $out, ">", $mailmap_file_new
|
|
or die "Failed to write '$mailmap_file_new': $!";
|
|
binmode $out
|
|
or die "Failed to binmode '$mailmap_file_new': $!";
|
|
print $out $new_raw_text
|
|
or die "Failed to print to '$mailmap_file_new': $!";
|
|
close $out
|
|
or die "Failed to close '$mailmap_file_new' after writing: $!";
|
|
rename $mailmap_file_new, $mailmap_file
|
|
or die
|
|
"Failed to rename '$mailmap_file_new' to '$mailmap_file': $!";
|
|
return 1;
|
|
}
|
|
else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
sub parse_orig_mailmap_hash {
|
|
my ($self)= @_;
|
|
my $mailmap_hash= $self->{orig_mailmap_hash};
|
|
|
|
my @recs;
|
|
foreach my $line (__sorted_hash_keys($mailmap_hash)) {
|
|
my $line_num= $mailmap_hash->{$line};
|
|
$line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
|
|
(?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
|
|
or die encode_utf8
|
|
"Failed to parse '$self->{mailmap_file}' line num $line_num: '$line'\n";
|
|
if (!$1 or !$2) {
|
|
die encode_utf8 "Both preferred name and email are mandatory ",
|
|
"in line num $line_num: '$line'";
|
|
}
|
|
my ($name, $email, $other_name, $other_email)= ($1, $2, $3, $4);
|
|
my ($orig_name, $orig_email)= ($1, $2);
|
|
if (my $new_name= $self->{change_name_for_name}{$orig_name}) {
|
|
$name= $new_name;
|
|
}
|
|
if (my $new_name= $self->{change_name_for_email}{$orig_email}) {
|
|
$name= $new_name;
|
|
}
|
|
if (my $new_email= $self->{change_email_for_name}{$orig_name}) {
|
|
$email= $new_email;
|
|
}
|
|
if (my $new_email= $self->{change_email_for_email}{$orig_email}) {
|
|
$email= $new_email;
|
|
}
|
|
|
|
push @recs, [ $name, $email, $other_name, $other_email, $line_num ];
|
|
}
|
|
return \@recs;
|
|
}
|
|
|
|
sub _safe_set_key {
|
|
my ($self, $hash, $root_key, $key, $val, $pretty_name)= @_;
|
|
$hash->{$root_key}{$key} //= $val;
|
|
my $prev= $hash->{$root_key}{$key};
|
|
if ($prev ne $val) {
|
|
die encode_utf8 "Collision on mapping $root_key: "
|
|
. " '$key' maps to '$prev' and '$val'\n";
|
|
}
|
|
}
|
|
|
|
my $O2P= "other2preferred";
|
|
my $O2PN= "other2preferred_name";
|
|
my $O2PE= "other2preferred_email";
|
|
my $P2O= "preferred2other";
|
|
my $N2P= "name2preferred";
|
|
my $E2P= "email2preferred";
|
|
|
|
my $blurb= ""; # FIXME - replace with a nice message
|
|
|
|
sub known_contributor {
|
|
my ($self, $name, $email)= @_;
|
|
if (!$name or !$email) { return 0 }
|
|
my $combined= "$name <$email>";
|
|
return ((
|
|
$self->{mailmap_info}{$O2P}{$combined}
|
|
&& $self->_keeper_digest($combined)
|
|
) ? 1 : 0
|
|
);
|
|
}
|
|
|
|
sub _check_name_mailmap {
|
|
my ($self, $auth_name, $raw_name, $commit_info, $descr)= @_;
|
|
my $mailmap_info= $self->{mailmap_info};
|
|
|
|
my $name= $self->_author_to_mailmap($auth_name);
|
|
|
|
my $digest= $self->_keeper_digest($name)
|
|
or return 1; # known but ignore
|
|
|
|
my $name_info= $mailmap_info->{$P2O}{$name};
|
|
|
|
if (!$name_info || !$name_info->{$raw_name}) {
|
|
if ($self->{exclude_missing}) {
|
|
$self->_exclude_contrib($name, $digest);
|
|
}
|
|
else {
|
|
$mailmap_info->{add}{"$name $raw_name"}++;
|
|
|
|
warn encode_utf8 sprintf
|
|
"Unknown %s '%s' in commit %s '%s'\n%s",
|
|
$descr,
|
|
$name,
|
|
$commit_info->{"abbrev_hash"},
|
|
$commit_info->{"commit_subject"}, $blurb
|
|
if $self->{verbose};
|
|
}
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub _author_to_mailmap {
|
|
my ($self, $name)= @_;
|
|
$name =~ s/<([^<>]+)>/<\L$1\E>/
|
|
or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
|
|
or $name .= " <unknown>";
|
|
|
|
$name= __fold_trim_ws($name);
|
|
return $name;
|
|
}
|
|
|
|
sub check_fix_mailmap_hash {
|
|
my ($self)= @_;
|
|
my $orig_mailmap_hash= $self->{orig_mailmap_hash};
|
|
my $author_info= $self->{author_info};
|
|
foreach my $key (keys %{ $author_info->{clean_full} }) {
|
|
$key .= " <unknown>"
|
|
unless $key =~ /\s+(?:<[^>]+>|\@\w+)\z/;
|
|
$key =~ s/\s+(\@\w+)\z/ <$1>/;
|
|
$orig_mailmap_hash->{"$key $key"} //= -1;
|
|
}
|
|
my $parsed= $self->parse_orig_mailmap_hash();
|
|
my @fixed;
|
|
my %seen_map;
|
|
my %pref_groups;
|
|
|
|
my $remove_no_names_with_overlaps= 0;
|
|
|
|
# first pass through the data, do any conversions, eg, LC
|
|
# the email address, decode any MIME-Header style email addresses.
|
|
# We also correct any preferred name entries so they match what
|
|
# we already have in AUTHORS, and check that there aren't collisions
|
|
# or other issues in the data.
|
|
foreach my $rec (@$parsed) {
|
|
my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
|
|
$pemail= lc($pemail);
|
|
$oemail= lc($oemail) if defined $oemail;
|
|
if ($pname =~ /=\?UTF-8\?/) {
|
|
$pname= decode("MIME-Header", $pname);
|
|
}
|
|
my $auth_email= $author_info->{"name2email"}{$pname};
|
|
if ($auth_email) {
|
|
## this name exists in authors, so use its email data for pemail
|
|
$pemail= $auth_email;
|
|
}
|
|
my $auth_name= $author_info->{"email2name"}{$pemail};
|
|
if ($auth_name) {
|
|
## this email exists in authors, so use its name data for pname
|
|
$pname= $auth_name;
|
|
}
|
|
|
|
# neither name nor email exist in authors.
|
|
if ($pname ne "unknown") {
|
|
if (my $email= $seen_map{"name"}{$pname}) {
|
|
## we have seen this pname before, check the pemail
|
|
## is consistent
|
|
if ($email ne $pemail) {
|
|
warn encode_utf8 "Inconsistent emails for name '$pname'"
|
|
. " at line num $line_num: keeping '$email',"
|
|
. " ignoring '$pemail'\n";
|
|
$pemail= $email;
|
|
}
|
|
}
|
|
else {
|
|
$seen_map{"name"}{$pname}= $pemail;
|
|
}
|
|
}
|
|
if ($pemail ne "unknown") {
|
|
if (my $name= $seen_map{"email"}{$pemail}) {
|
|
## we have seen this preferred_email before, check the preferred_name
|
|
## is consistent
|
|
if ($name ne $pname) {
|
|
warn encode_utf8 "Inconsistent name for email '$pemail'"
|
|
. " at line num $line_num: keeping '$name', ignoring"
|
|
. " '$pname'\n";
|
|
$pname= $name;
|
|
}
|
|
}
|
|
else {
|
|
$seen_map{"email"}{$pemail}= $pname;
|
|
}
|
|
}
|
|
|
|
my $rec= [ $pname, $pemail, $oname, $oemail, $line_num ];
|
|
if ($remove_no_names_with_overlaps) {
|
|
|
|
# Build an index of "preferred name/email" to other-email, other name
|
|
# we use this later to remove redundant entries missing a name.
|
|
$pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= $rec;
|
|
}
|
|
else {
|
|
push @fixed, $rec;
|
|
}
|
|
}
|
|
|
|
if ($remove_no_names_with_overlaps) {
|
|
|
|
# this removes entries like
|
|
# Joe <blogs> <whatever>
|
|
# where there is a corresponding
|
|
# Joe <blogs> Joe X <whatever>
|
|
foreach my $pref (__sorted_hash_keys(\%pref_groups)) {
|
|
my $entries= $pref_groups{$pref};
|
|
foreach my $email (__sorted_hash_keys($entries)) {
|
|
my @names= __sorted_hash_keys($entries->{$email});
|
|
if (0 and $names[0] eq "" and @names > 1) {
|
|
shift @names;
|
|
}
|
|
foreach my $name (@names) {
|
|
push @fixed, $entries->{$email}{$name};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# final pass through the dataset, build up a database
|
|
# we will use later for checks and updates, and reconstruct
|
|
# the canonical entries.
|
|
my $new_mailmap_hash= {};
|
|
my $mailmap_info= {};
|
|
foreach my $rec (@fixed) {
|
|
my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
|
|
my $preferred= "$pname <$pemail>";
|
|
my $other;
|
|
if (defined $oemail) {
|
|
$other= $oname ? "$oname <$oemail>" : "<$oemail>";
|
|
}
|
|
if (!$self->_keeper_digest($preferred)) {
|
|
$self->_exclude_contrib($other);
|
|
next;
|
|
}
|
|
elsif (!$self->_keeper_digest($other)) {
|
|
next;
|
|
}
|
|
if ($other and $other ne "<unknown>") {
|
|
$self->_safe_set_key($mailmap_info, $O2P, $other, $preferred);
|
|
$self->_safe_set_key($mailmap_info, $O2PN, $other, $pname);
|
|
$self->_safe_set_key($mailmap_info, $O2PE, $other, $pemail);
|
|
}
|
|
$mailmap_info->{$P2O}{$preferred}{$other}++;
|
|
if ($pname ne "unknown") {
|
|
$self->_safe_set_key($mailmap_info, $N2P, $pname, $preferred);
|
|
}
|
|
if ($pemail ne "unknown") {
|
|
$self->_safe_set_key($mailmap_info, $E2P, $pemail, $preferred);
|
|
}
|
|
my $line= $preferred;
|
|
$line .= " $other" if $other;
|
|
$new_mailmap_hash->{$line}= $line_num;
|
|
}
|
|
$self->{new_mailmap_hash}= $new_mailmap_hash;
|
|
$self->{mailmap_info}= $mailmap_info;
|
|
return ($new_mailmap_hash, $mailmap_info);
|
|
}
|
|
|
|
sub add_new_mailmap_entries {
|
|
my ($self)= @_;
|
|
my $mailmap_hash= $self->{new_mailmap_hash};
|
|
my $mailmap_info= $self->{mailmap_info};
|
|
my $mailmap_file= $self->{mailmap_file};
|
|
|
|
my $mailmap_add= $mailmap_info->{add}
|
|
or return 0;
|
|
|
|
my $num= 0;
|
|
for my $new (__sorted_hash_keys($mailmap_add)) {
|
|
!$mailmap_hash->{$new}++ or next;
|
|
warn encode_utf8 "Updating '$mailmap_file' with: $new\n"
|
|
if $self->{verbose};
|
|
$num++;
|
|
}
|
|
return $num;
|
|
}
|
|
|
|
sub read_and_update {
|
|
my ($self)= @_;
|
|
my ($authors_file, $mailmap_file)=
|
|
%{$self}{qw(authors_file mailmap_file)};
|
|
|
|
# read the authors file and extract the info it contains
|
|
$self->read_authors_file();
|
|
|
|
# read the mailmap file.
|
|
$self->read_mailmap_file();
|
|
|
|
# check and possibly fix the mailmap data, and build a set of precomputed
|
|
# datasets to work with it.
|
|
$self->check_fix_mailmap_hash();
|
|
|
|
# update the mailmap based on any check or fixes we just did.
|
|
$self->update_mailmap_file();
|
|
|
|
# read the commits names using git log, and compares and checks
|
|
# them against the data we have in authors.
|
|
$self->read_commit_log();
|
|
|
|
# update the authors file with any changes
|
|
$self->update_authors_file();
|
|
|
|
# check if we discovered new email data from the commits that
|
|
# we need to write back to disk.
|
|
$self->add_new_mailmap_entries()
|
|
and $self->update_mailmap_file();
|
|
|
|
$self->update_exclude_file();
|
|
|
|
return $self->changed_count();
|
|
}
|
|
|
|
sub read_exclude_file {
|
|
my ($self)= @_;
|
|
my $exclude_file= $self->{exclude_file};
|
|
my $exclude_digest= $self->{exclude_digest} ||= {};
|
|
|
|
open my $in_fh, "<", $exclude_file
|
|
or do {
|
|
warn "Failed to open '$exclude_file': $!";
|
|
return;
|
|
};
|
|
my $head= "";
|
|
my $orig= "";
|
|
my $seen_data= 0;
|
|
while (defined(my $line= <$in_fh>)) {
|
|
$orig .= $line;
|
|
if ($line =~ /^\s*#/ || $line !~ /\S/) {
|
|
$head .= $line unless $seen_data;
|
|
next;
|
|
}
|
|
else {
|
|
$seen_data= 1;
|
|
}
|
|
chomp($line);
|
|
$line =~ s/\A\s+//;
|
|
$line =~ s/\s*(?:#.*)?\z//;
|
|
$exclude_digest->{$line}++ if length($line);
|
|
}
|
|
close $in_fh
|
|
or die "Failed to close '$exclude_file' after reading: $!";
|
|
if (!$head) {
|
|
die sprintf <<'EOFMT', $exclude_file;
|
|
Possibly corrupted exclude file '%s'.
|
|
|
|
This file should have a header of '#' comments in it.
|
|
|
|
Where did they go?
|
|
|
|
Cowardly refusing to continue until this is fixed.
|
|
EOFMT
|
|
}
|
|
$self->{exclude_file_text_head}= $head;
|
|
$self->{exclude_file_text_orig}= $orig;
|
|
|
|
return $exclude_digest;
|
|
}
|
|
|
|
sub update_exclude_file {
|
|
my ($self)= @_;
|
|
my $exclude_file= $self->{exclude_file};
|
|
my $exclude_text= $self->{exclude_file_text_head};
|
|
foreach my $digest (__sorted_hash_keys($self->{exclude_digest})) {
|
|
$exclude_text .= "$digest\n";
|
|
}
|
|
if ($exclude_text ne $self->{exclude_file_text_orig}) {
|
|
$self->{changed_count}++;
|
|
$self->_log_file_changes_quick_and_dirty_diff($exclude_file,
|
|
$self->{exclude_file_text_orig},
|
|
$exclude_text);
|
|
|
|
if ($self->{no_update}) {
|
|
return 1;
|
|
}
|
|
|
|
warn "Updating '$exclude_file'\n" if $self->{verbose};
|
|
|
|
my $tmp_file= "$exclude_file.new";
|
|
open my $out_fh, ">", $tmp_file
|
|
or die "Cant open '$tmp_file' for write $!";
|
|
print $out_fh $exclude_text
|
|
or die "Failed to print to '$tmp_file': $!";
|
|
close $out_fh
|
|
or die "Failed to close '$tmp_file' after writing: $!";
|
|
rename $tmp_file, $exclude_file
|
|
or die "Failed to rename '$tmp_file' to '$exclude_file': $!";
|
|
|
|
return 1;
|
|
}
|
|
else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
sub changed_count {
|
|
my ($self)= @_;
|
|
return $self->{changed_count};
|
|
}
|
|
|
|
sub changed_file {
|
|
my ($self, $name)= @_;
|
|
return $self->{changed_file}{$name};
|
|
}
|
|
|
|
sub unchanged_file {
|
|
my ($self, $name)= @_;
|
|
return $self->changed_file($name) ? 0 : 1;
|
|
}
|
|
|
|
sub new {
|
|
my ($class, %self)= @_;
|
|
$self{changed_count}= 0;
|
|
for my $name (qw(authors_file mailmap_file exclude_file)) {
|
|
$self{$name}
|
|
or die "Property '$name' is mandatory in constructor";
|
|
}
|
|
|
|
my $self= bless \%self, $class;
|
|
|
|
if (my $ary= $self->{exclude_contrib}) {
|
|
$self->_exclude_contrib($_) for @$ary;
|
|
}
|
|
|
|
$self->read_exclude_file();
|
|
|
|
die Dumper(\%self) if $self{dump_opts};
|
|
|
|
return $self;
|
|
}
|
|
|
|
sub __digest {
|
|
my $thing= $_[0];
|
|
utf8::encode($thing);
|
|
return sha256_base64($thing);
|
|
}
|
|
|
|
# if this name is a "keeper" then return its digest
|
|
# (if we know the digest and it is marked for exclusion
|
|
# then we return 0)
|
|
sub _keeper_digest {
|
|
my ($self, $real_name)= @_;
|
|
my $digest;
|
|
$digest= $self->{digest_cache}{$real_name};
|
|
|
|
if (!$digest) {
|
|
my $name= __fold_trim_ws($real_name);
|
|
|
|
$digest= ($self->{digest_cache}{$name} //= __digest($name));
|
|
$self->{digest_cache}{$real_name}= $digest;
|
|
}
|
|
|
|
return $self->{exclude_digest}{$digest} ? 0 : $digest;
|
|
}
|
|
|
|
# should we exclude this author from the AUTHORS file
|
|
# simply because of the form of their details?
|
|
sub _logical_exclude_author {
|
|
my ($self, $author)= @_;
|
|
|
|
# don't know the persona
|
|
return 1 if $author =~ /^unknown/;
|
|
|
|
# Someone at <unknown> with a single word name.
|
|
# Eg, we wont list "Bob <unknown>"
|
|
if ($author =~ s/\s*<unknown>\z//) {
|
|
return 1 if $author =~ /^\w+$/;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# exclude this contributor by name, if digest isnt provided
|
|
# then it is computed using _digest.
|
|
sub _exclude_contrib {
|
|
my ($self, $name, $digest)= @_;
|
|
|
|
# if we would exclude them anyway due to the logical
|
|
# naming rules then we do not need to add them to the exclude
|
|
# file.
|
|
return if $self->_logical_exclude_author($name);
|
|
$name= __fold_trim_ws($name);
|
|
$digest //= __digest($name);
|
|
$self->{exclude_digest}{$digest}++
|
|
or warn "Excluding '$name' with '$digest'\n";
|
|
}
|
|
|
|
sub _log_file_changes_quick_and_dirty_diff {
|
|
my ($self, $file, $old_raw_text, $new_raw_text)= @_;
|
|
|
|
my %old;
|
|
$old{$_}++ for split /\n/, $old_raw_text;
|
|
my %new;
|
|
$new{$_}++ for split /\n/, $new_raw_text;
|
|
foreach my $key (keys %new) {
|
|
delete $new{$key} if delete $old{$key};
|
|
}
|
|
$self->{changed_file}{$file}{add}= \%new if keys %new;
|
|
$self->{changed_file}{$file}{del}= \%old if keys %old;
|
|
return $self->{changed_file}{$file};
|
|
}
|
|
|
|
sub _diff_diag {
|
|
my ($self, $want_file)= @_;
|
|
my $diag_str= "";
|
|
foreach my $file (sort keys %{ $self->{changed_file} || {} }) {
|
|
next if $want_file and $file ne $want_file;
|
|
$diag_str .= " File '$file' changes:\n";
|
|
foreach my $action (sort keys %{ $self->{changed_file}{$file} }) {
|
|
foreach
|
|
my $line (sort keys %{ $self->{changed_file}{$file}{$action} })
|
|
{
|
|
$diag_str .= " would $action: $line\n";
|
|
}
|
|
}
|
|
}
|
|
return $diag_str;
|
|
}
|
|
|
|
my %pretty_name= (
|
|
"author" => "Authored",
|
|
"committer" => "Committed",
|
|
"applied" => "Applied",
|
|
"name" => "Name",
|
|
"pos" => "Pos",
|
|
"num_files" => "NFiles",
|
|
"lines_added" => "L++",
|
|
"lines_removed" => "L--",
|
|
"lines_delta" => "L+-",
|
|
"binary_changed" => "Bin+-",
|
|
);
|
|
|
|
sub report_stats {
|
|
my ($self, $stats_key, @types)= @_;
|
|
my @extra= "name";
|
|
my @rows;
|
|
my @total;
|
|
foreach my $name (__sorted_hash_keys($self->{$stats_key})) {
|
|
my @data= map { $self->{$stats_key}{$name}{$_} // 0 } @types;
|
|
$total[$_] += $data[$_] for 0 .. $#data;
|
|
push @data, $name;
|
|
push @rows, \@data if $data[0];
|
|
}
|
|
@rows= sort {
|
|
my $cmp= 0;
|
|
for (0 .. $#$a - 1) {
|
|
$cmp= $b->[$_] <=> $a->[$_];
|
|
last if $cmp;
|
|
}
|
|
$cmp ||= $Collate->cmp($a->[-1], $b->[-1]);
|
|
$cmp
|
|
} @rows;
|
|
@rows= reverse @rows if $self->{in_reverse};
|
|
|
|
if ($self->{as_cumulative}) {
|
|
my $sum= [];
|
|
for my $row (@rows) {
|
|
do {
|
|
$sum->[$_] += $row->[$_];
|
|
$row->[$_]= $sum->[$_];
|
|
}
|
|
for 0 .. $#types;
|
|
}
|
|
}
|
|
|
|
if ($self->{as_percentage}) {
|
|
for my $row (@rows) {
|
|
$row->[$_]= sprintf "%.2f", ($row->[$_] / $total[$_]) * 100
|
|
for 0 .. $#types;
|
|
}
|
|
}
|
|
|
|
foreach my $row (@rows) {
|
|
my $name= $row->[-1];
|
|
$name =~ s/\s+<.*\z//;
|
|
$name =~ s/\s+\@.*\z//;
|
|
$row->[-1]= $name;
|
|
}
|
|
my @col_names= map { $pretty_name{$_} // $_ } @types;
|
|
if ($self->{as_percentage}) {
|
|
$_= "%$_" for @col_names;
|
|
}
|
|
push @col_names, map { $pretty_name{$_} // $_ } @extra;
|
|
|
|
if ($self->{as_list} && @types == 1) {
|
|
$self->_report_list(\@rows, \@types, \@extra, \@col_names);
|
|
}
|
|
else {
|
|
$self->_report_table(\@rows, \@types, \@extra, \@col_names);
|
|
}
|
|
}
|
|
|
|
sub _report_table {
|
|
my ($self, $rows, $types, $extra, $col_names)= @_;
|
|
my $pos= 1;
|
|
unshift @$_, $pos++ for @$rows;
|
|
unshift @$col_names, "Pos";
|
|
my @width= (0) x @$col_names;
|
|
foreach my $row ($col_names, @$rows) {
|
|
for my $idx (0 .. $#$row) {
|
|
$width[$idx] < length($row->[$idx])
|
|
and $width[$idx]= length($row->[$idx]);
|
|
}
|
|
}
|
|
$width[-1]= 40 if $width[-1] > 40;
|
|
$width[$_]= -$width[$_] for 0, -1;
|
|
my $fmt= "#" . join(" | ", ("%*s") x @$col_names) . "\n";
|
|
my $bar_fmt= "#" . join("-+-", ("%*s") x @$col_names) . "\n";
|
|
printf $fmt, map { $width[$_], $col_names->[$_] } 0 .. $#width;
|
|
printf $bar_fmt, map { $width[$_], "-" x abs($width[$_]) } 0 .. $#width;
|
|
for my $idx (0 .. $#$rows) {
|
|
my $row= $rows->[$idx];
|
|
print encode_utf8 sprintf $fmt,
|
|
map { $width[$_], $row->[$_] } 0 .. $#width;
|
|
}
|
|
}
|
|
|
|
sub _report_list {
|
|
my ($self, $rows, $types, $extra, $col_names)= @_;
|
|
my %hash;
|
|
foreach my $row (@$rows) {
|
|
$hash{ $row->[0] }{ $row->[-1] }++;
|
|
}
|
|
my @vals= sort { $b <=> $a } keys %hash; # numeric sort
|
|
my $width= length($col_names->[0]);
|
|
$width < length($_) and $width= length($_) for @vals;
|
|
@vals= reverse @vals if $self->{in_reverse};
|
|
|
|
my $hdr_str= sprintf "%*s | %s", $width, $col_names->[0], $col_names->[-1];
|
|
my $sep_str= sprintf "%*s-+-%s", $width, "-" x $width, "-" x 40;
|
|
my $fmt= "%*s | %s";
|
|
|
|
if ($self->{with_rank_numbers}) {
|
|
$hdr_str= sprintf "#%*s | %s", -length(0 + @$rows), "Pos", $hdr_str;
|
|
$sep_str= sprintf "#%*s-+-%s", -length(0 + @$rows),
|
|
"-" x length(0 + @$rows), $hdr_str;
|
|
}
|
|
print $hdr_str, "\n";
|
|
print $sep_str, "\n";
|
|
my $pos= 1;
|
|
foreach my $val (@vals) {
|
|
my $val_f= sprintf "%*s | ", $width, $val;
|
|
$val_f= sprintf "#%*d | %s", -length(0 + @$rows), $pos++, $val_f
|
|
if $self->{with_rank_numbers};
|
|
print encode_utf8 wrap $val_f,
|
|
" " x length($val_f),
|
|
join(", ", $Collate->sort(keys %{ $hash{$val} })) . "\n";
|
|
}
|
|
}
|
|
|
|
sub _filter_sort_who {
|
|
my ($self, $hash)= @_;
|
|
my @who;
|
|
foreach my $name ($Collate->sort(keys %$hash)) {
|
|
$name =~ s/\s+<.*\z//;
|
|
$name =~ s/\s+\@.*\z//;
|
|
push @who, $name if length $name and lc($name) ne "unknown";
|
|
}
|
|
return @who;
|
|
}
|
|
|
|
sub print_who {
|
|
my ($self)= @_;
|
|
my @who= $self->_filter_sort_who($self->{who_stats});
|
|
print encode_utf8 wrap "", "", join(", ", @who) . ".\n";
|
|
}
|
|
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Porting::updateAUTHORS - Library to automatically update AUTHORS and .mailmap based on commit data.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Porting::updateAUTHORS;
|
|
|
|
my $updater= Porting::updateAUTHORS->new(
|
|
authors_file => "AUTHORS",
|
|
mailmap_file => ".mailmap",
|
|
exclude_file => "Porting/exclude_contrib.txt",
|
|
);
|
|
$updater->read_and_update();
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This the brain of the F<Porting/updateAUTHORS.pl> script. It is expected
|
|
to be used B<from> that script and B<by> that script. Most features and
|
|
options are documented in the F<Porting/updateAUTHORS.pl> and are not
|
|
explicitly documented here, read the F<Porting/updateAUTHORS.pl> manpage
|
|
for more details.
|
|
|
|
=head1 METHODS
|
|
|
|
Porting::updateAUTHORS uses OO as way of managing its internal state.
|
|
This documents the public methods it exposes.
|
|
|
|
=over 4
|
|
|
|
=item add_new_mailmap_entries()
|
|
|
|
If any additions were identified while reading the commits this will
|
|
inject them into the mailmap_hash so they can be written out. Returns a
|
|
count of additions found.
|
|
|
|
=item check_fix_mailmap_hash()
|
|
|
|
Analyzes the data contained the in the .mailmap file and applies any
|
|
automated fixes which are required and which it can automatically
|
|
perform. Returns a hash of adjusted entries and a hash with additional
|
|
metadata about the mailmap entries.
|
|
|
|
=item new(%opts)
|
|
|
|
Create a new object. Required parameters are
|
|
|
|
authors_file
|
|
mailmap_file
|
|
exclude_file
|
|
|
|
Other supported parameters are as follows:
|
|
|
|
verbose
|
|
commit_range
|
|
|
|
this list is not exhaustive. See the code implementing the main()
|
|
function in F<Porting/updateAUTHORS.pl> for an exhaustive list.
|
|
|
|
=item parse_orig_mailmap_hash()
|
|
|
|
Takes a mailmap_hash and parses it and returns it as an array of array
|
|
records with the contents:
|
|
|
|
[ $preferred_name, $preferred_email,
|
|
$other_name, $other_email,
|
|
$line_num ]
|
|
|
|
=item read_and_update()
|
|
|
|
Wraps the other functions in this library and implements the logic and
|
|
intent of this tool. Takes two arguments, the authors file name, and the
|
|
mailmap file name. Returns nothing but may modify the AUTHORS file
|
|
or the .mailmap file. Requires that both files are editable.
|
|
|
|
=item read_commit_log()
|
|
|
|
Read the commit log specified by the property "commit_range" and find
|
|
any new names it contains.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item read_authors_file()
|
|
|
|
Read the AUTHORS file into the object, and return data about it.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item read_mailmap_file()
|
|
|
|
Read the .mailmap file into the object and return data about it.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item read_exclusion_file()
|
|
|
|
Read the exclusion file into the object and return data about it.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item update_authors_file()
|
|
|
|
Write out an updated AUTHORS file atomically if it has changed,
|
|
returns 0 if the file was actually updated, 1 if it was not.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item update_mailmap_file()
|
|
|
|
Write out an updated .mailmap file atomically if it has changed,
|
|
returns 0 if the file was actually updated, 1 if it was not.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=item update_exclusion_file()
|
|
|
|
Write out an updated exclusion file atomically if it has changed,
|
|
returns 0 if the file was actually update, 1 if it was not.
|
|
|
|
Normally used via C<read_and_update> and not called directly.
|
|
|
|
=back
|
|
|
|
=head1 TODO
|
|
|
|
More documentation and testing.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
F<Porting/checkAUTHORS.pl>
|
|
|
|
=head1 AUTHOR
|
|
|
|
Yves Orton <demerphq@gmail.com>
|
|
|
|
=cut
|