Skip to content

Commit d1b4d6a

Browse files
authored
[infra] Add stats-report generating scripts (#8304)
1 parent b20f3bd commit d1b4d6a

File tree

4 files changed

+814
-0
lines changed

4 files changed

+814
-0
lines changed

scripts/stats/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Statistics scripts
2+
3+
Perl scripts for generating git statistics reports.
4+
5+
- `l10n.pl` - Localization statistics (commits, coverage, contributors per
6+
locale)
7+
- `repo.pl` - Repository-wide statistics (commits, activity by area,
8+
contributors)
9+
- `StatsCommon.pm` - Shared module used by both scripts
10+
11+
Run with `--help` for usage. By default, saves reports to `tmp/` with date-based
12+
filenames.

scripts/stats/StatsCommon.pm

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
package StatsCommon;
2+
3+
use strict;
4+
use warnings;
5+
use POSIX qw(strftime);
6+
7+
require Exporter;
8+
our @ISA = qw(Exporter);
9+
our @EXPORT = qw(
10+
get_author_consolidation
11+
get_consolidated_contributors
12+
get_line_stats
13+
count_commits
14+
count_unique_contributors
15+
get_median
16+
get_average
17+
validate_date
18+
get_default_dates
19+
setup_output
20+
finish_output
21+
);
22+
23+
# Get author consolidation (email -> most recent name)
24+
sub get_author_consolidation {
25+
my ($since, $until, $path_spec) = @_;
26+
27+
my %email_to_name;
28+
my %email_to_date;
29+
30+
# Get all commits with email, name, and date
31+
my $cmd = "git log --since='$since' --until='$until' --format='%aE|%aN|%ai'";
32+
$cmd .= " -- $path_spec" if $path_spec;
33+
34+
my @commits = `$cmd`;
35+
chomp @commits;
36+
37+
foreach my $line (@commits) {
38+
my ($email, $name, $date) = split /\|/, $line, 3;
39+
next unless $email;
40+
41+
# Keep the most recent name for each email
42+
if (!exists $email_to_date{$email} || $date gt $email_to_date{$email}) {
43+
$email_to_name{$email} = $name;
44+
$email_to_date{$email} = $date;
45+
}
46+
}
47+
48+
return \%email_to_name;
49+
}
50+
51+
# Get consolidated contributors list
52+
sub get_consolidated_contributors {
53+
my ($since, $until, $path_spec, $limit) = @_;
54+
55+
my $email_to_name = get_author_consolidation($since, $until, $path_spec);
56+
my %email_to_count;
57+
58+
# Count commits per email
59+
my $cmd = "git log --since='$since' --until='$until' --format='%aE'";
60+
$cmd .= " -- $path_spec" if $path_spec;
61+
62+
my @emails = `$cmd`;
63+
chomp @emails;
64+
65+
foreach my $email (@emails) {
66+
next unless $email;
67+
$email_to_count{$email}++;
68+
}
69+
70+
# Sort by count descending and return
71+
my @sorted = sort { $email_to_count{$b} <=> $email_to_count{$a} } keys %email_to_count;
72+
73+
if ($limit && $limit > 0) {
74+
@sorted = splice(@sorted, 0, $limit);
75+
}
76+
77+
my @results;
78+
foreach my $email (@sorted) {
79+
my $name = $email_to_name->{$email} || $email;
80+
push @results, { name => $name, count => $email_to_count{$email} };
81+
}
82+
83+
return @results;
84+
}
85+
86+
# Get line statistics (added, removed, net change)
87+
sub get_line_stats {
88+
my ($since, $until, $path_spec) = @_;
89+
90+
my $cmd = "git log --since='$since' --until='$until' --numstat --format=''";
91+
$cmd .= " -- $path_spec" if $path_spec;
92+
93+
my @lines = `$cmd`;
94+
95+
my ($added, $removed) = (0, 0);
96+
foreach my $line (@lines) {
97+
if ($line =~ /^(\d+)\s+(\d+)/) {
98+
$added += $1;
99+
$removed += $2;
100+
}
101+
}
102+
103+
return ($added, $removed, $added - $removed);
104+
}
105+
106+
# Count commits
107+
sub count_commits {
108+
my ($since, $until, $path_spec) = @_;
109+
110+
my $cmd = "git log --since='$since' --until='$until' --oneline";
111+
$cmd .= " -- $path_spec" if $path_spec;
112+
113+
my @commits = `$cmd`;
114+
return scalar @commits;
115+
}
116+
117+
# Count unique contributors
118+
sub count_unique_contributors {
119+
my ($since, $until, $path_spec) = @_;
120+
121+
my $cmd = "git log --since='$since' --until='$until' --format='%aE'";
122+
$cmd .= " -- $path_spec" if $path_spec;
123+
124+
my @emails = `$cmd`;
125+
chomp @emails;
126+
127+
my %unique;
128+
foreach my $email (@emails) {
129+
$unique{$email} = 1 if $email;
130+
}
131+
132+
return scalar keys %unique;
133+
}
134+
135+
# Calculate median
136+
sub get_median {
137+
my @values = sort { $a <=> $b } @_;
138+
return 0 unless @values;
139+
140+
my $mid = int(@values / 2);
141+
if (@values % 2) {
142+
return $values[$mid];
143+
} else {
144+
return ($values[$mid-1] + $values[$mid]) / 2;
145+
}
146+
}
147+
148+
# Calculate average
149+
sub get_average {
150+
my @values = @_;
151+
return 0 unless @values;
152+
153+
my $sum = 0;
154+
$sum += $_ for @values;
155+
return $sum / scalar(@values);
156+
}
157+
158+
# Validate date format
159+
sub validate_date {
160+
my $date = shift;
161+
return $date =~ /^\d{4}-\d{2}-\d{2}$/;
162+
}
163+
164+
# Get default dates (YTD)
165+
sub get_default_dates {
166+
my $year = strftime("%Y", localtime);
167+
my $since = "$year-01-01";
168+
my $until = strftime("%Y-%m-%d", localtime);
169+
return ($since, $until);
170+
}
171+
172+
# Setup output (returns use_stdout flag and final filename)
173+
sub setup_output {
174+
my ($output_file, $since, $until, $prefix) = @_;
175+
176+
my $final_output_file;
177+
my $use_stdout = 0;
178+
179+
if ($output_file eq '-') {
180+
# Use stdout
181+
$use_stdout = 1;
182+
$final_output_file = undef;
183+
} elsif ($output_file) {
184+
# User specified a file
185+
$final_output_file = $output_file;
186+
} else {
187+
# Default: save to tmp/ with date-based filename
188+
mkdir 'tmp' unless -d 'tmp';
189+
$final_output_file = "tmp/$prefix-$since-to-$until.md";
190+
}
191+
192+
# Redirect output if writing to file
193+
if (!$use_stdout) {
194+
open(STDOUT, '>', $final_output_file) or die "ERROR: Cannot write to $final_output_file: $!\n";
195+
}
196+
197+
return ($use_stdout, $final_output_file);
198+
}
199+
200+
# Finish output (close file and print success message)
201+
sub finish_output {
202+
my ($use_stdout, $final_output_file) = @_;
203+
204+
if (!$use_stdout) {
205+
close(STDOUT);
206+
print STDERR "Report saved to $final_output_file\n";
207+
}
208+
}
209+
210+
1;
211+

0 commit comments

Comments
 (0)