processor/history_author_timeline.go · boyter/scc

1// SPDX-License-Identifier: MIT23package processor45import (6	"encoding/csv"7	"fmt"8	"os"9	"slices"10	"strings"11	"time"1213	jsoniter "github.com/json-iterator/go"14	glanguage "golang.org/x/text/language"15	gmessage "golang.org/x/text/message"16)1718// authorTimelineSparkCells is the fixed width of the Activity sparkline cell19// in the 79-column tabular report. The fixed-resolution per-bucket series is20// downsampled to this many cells.21const authorTimelineSparkCells = 242223// authorTimelineTopN caps tabular rows. CSV/JSON are uncapped.24const authorTimelineTopN = 152526// authorTimelineBucket is one bucket's worth of accumulator state for one27// author.28type authorTimelineBucket struct {29	Commits   int30	CodeDelta int6431}3233// authorTimelineRow is the materialised per-author result.34type authorTimelineRow struct {35	Name         string36	Email        string37	TotalCommits int38	CodeDelta    int6439	Series       []authorTimelineBucket40}4142// authorTimelineEvent records one observed commit's contribution so the43// observer can bin it under the real window's Bucketing in Finalise (the44// engine doesn't expose the window until after the walk).45type authorTimelineEvent struct {46	Author    authorID47	When      time.Time48	CodeDelta int6449}5051// historyAuthorTimelineObserver collects per-commit events during the walk52// and bins them into per-(author, bucket) totals on Finalise. It implements53// MailmapObserver to pick up the mailmap without paying for the full54// start-tree classification — the report tracks deltas, not last-toucher55// attribution, so per-line baseline state is not needed.56type historyAuthorTimelineObserver struct {57	registry    *authorRegistry58	events      []authorTimelineEvent59	bucketCount int6061	bucket Bucketing62	window HistoryWindow63	rows   []authorTimelineRow64}6566func newHistoryAuthorTimelineObserver(buckets int) *historyAuthorTimelineObserver {67	if buckets <= 0 {68		buckets = 6069	}70	return &historyAuthorTimelineObserver{71		registry:    newAuthorRegistry(nil),72		bucketCount: buckets,73	}74}7576// SetMailmap satisfies MailmapObserver — rebuilds the registry with the77// repo's .mailmap so the timeline folds identities the same way the author78// rollup does.79func (o *historyAuthorTimelineObserver) SetMailmap(mm *mailmap) {80	o.registry = newAuthorRegistry(mm)81}8283func (o *historyAuthorTimelineObserver) Observe(c CommitInfo, changes []FileChange) {84	aid := o.registry.intern(c.Author, c.Email)8586	var delta int6487	for _, fc := range changes {88		added := splitAddedCodeLines(fc.AddedRanges, fc.LineTypes)89		removed := splitRemovedCodeLines(fc.RemovedRanges, fc.RemovedLineTypes)90		delta += int64(added) - int64(removed)91	}92	o.events = append(o.events, authorTimelineEvent{93		Author:    aid,94		When:      c.When,95		CodeDelta: delta,96	})97}9899func (o *historyAuthorTimelineObserver) Finalise(window HistoryWindow, head HeadSnapshot) {100	o.window = window101	o.bucket = NewBucketing(window.From, window.To, o.bucketCount)102103	series := map[authorID][]authorTimelineBucket{}104	for _, ev := range o.events {105		s := series[ev.Author]106		if s == nil {107			s = make([]authorTimelineBucket, o.bucket.N)108			series[ev.Author] = s109		}110		idx := o.bucket.Index(ev.When)111		s[idx].Commits++112		s[idx].CodeDelta += ev.CodeDelta113	}114115	rows := make([]authorTimelineRow, 0, len(series))116	for aid, s := range series {117		if aid == sentinelAuthorID {118			continue119		}120		rec := o.registry.record(aid)121		row := authorTimelineRow{122			Name:   rec.Name,123			Email:  rec.Email,124			Series: s,125		}126		for _, b := range s {127			row.TotalCommits += b.Commits128			row.CodeDelta += b.CodeDelta129		}130		rows = append(rows, row)131	}132133	slices.SortFunc(rows, func(a, b authorTimelineRow) int {134		if a.TotalCommits != b.TotalCommits {135			if a.TotalCommits < b.TotalCommits {136				return 1137			}138			return -1139		}140		return strings.Compare(a.Name, b.Name)141	})142	o.rows = rows143}144145// splitAddedCodeLines returns the count of added lines classified as code by146// the new blob's LineTypes vector. Mirrors splitChurnByType but only returns147// the code component.148func splitAddedCodeLines(added []LineRange, lineTypes []LineType) int {149	code := 0150	for _, r := range added {151		for i := 0; i < r.Count; i++ {152			ln := r.Start - 1 + i153			if ln < 0 || ln >= len(lineTypes) {154				continue155			}156			if lineTypes[ln] == LINE_CODE {157				code++158			}159		}160	}161	return code162}163164// splitRemovedCodeLines counts removed lines classified as code by the OLD165// blob's LineTypes vector. RemovedRanges are in old-blob (pre-diff) line166// coordinates, so they index removedLineTypes directly. Removed-side mirror167// of splitAddedCodeLines — together they yield a symmetric code-only delta.168// Returns 0 when the old blob could not be classified.169func splitRemovedCodeLines(removed []LineRange, removedLineTypes []LineType) int {170	code := 0171	for _, r := range removed {172		for i := 0; i < r.Count; i++ {173			ln := r.Start - 1 + i174			if ln < 0 || ln >= len(removedLineTypes) {175				continue176			}177			if removedLineTypes[ln] == LINE_CODE {178				code++179			}180		}181	}182	return code183}184185// runAuthorTimelineReport is the dispatch entry point called from Process()186// when --by-author --timeline is set. Opens the repo, walks the window with187// the configured bucket count, and writes the chosen format.188func runAuthorTimelineReport(repoPath string) error {189	observer := newHistoryAuthorTimelineObserver(HistoryBuckets)190	if _, err := runHistory(repoPath, observer); err != nil {191		return err192	}193	out, err := renderAuthorTimeline(observer)194	if err != nil {195		return err196	}197	if FileOutput == "" {198		fmt.Print(out)199	} else {200		if err := os.WriteFile(FileOutput, []byte(out), 0644); err != nil {201			return err202		}203		fmt.Println("results written to " + FileOutput)204	}205	return nil206}207208func renderAuthorTimeline(o *historyAuthorTimelineObserver) (string, error) {209	switch strings.ToLower(Format) {210	case "", "tabular", "wide":211		return renderAuthorTimelineTabular(o), nil212	case "csv":213		return renderAuthorTimelineCSV(o)214	case "json":215		return renderAuthorTimelineJSON(o)216	default:217		return "", fmt.Errorf("unsupported --format %q for --by-author --timeline (supported: tabular, csv, json)", Format)218	}219}220221// Tabular column format. 24+1+24+1+8+1+9+1+10 = 79.222var tabularShortAuthorTimelineFormatHead = "%-24s %-24s %8s %9s %-10s\n"223224func renderAuthorTimelineTabular(o *historyAuthorTimelineObserver) string {225	wide := More || strings.EqualFold(Format, "wide")226	brk := tabularBreakFor(wide)227228	var sb strings.Builder229	sb.WriteString(historyHeader("Authors", o.window, wide))230231	p := gmessage.NewPrinter(glanguage.Make(os.Getenv("LANG")))232233	_, _ = fmt.Fprintf(&sb, tabularShortAuthorTimelineFormatHead,234		"Author", "Activity", "Commits", "Code±", "")235	sb.WriteString(brk)236237	limit := min(len(o.rows), authorTimelineTopN)238239	for i := range limit {240		r := o.rows[i]241		nameCol := unicodeAwareTrim(r.Name, 23)242		nameCol = unicodeAwareRightPad(nameCol, 24)243		spark := renderAuthorTimelineSparkline(r.Series, authorTimelineSparkCells)244		tag := authorTimelineTag(r.Series, o.bucket.Width)245		commitsStr := formatWithCommas(p, int64(r.TotalCommits))246		codeStr := formatCodeDelta(p, r.CodeDelta)247		_, _ = fmt.Fprintf(&sb, tabularShortAuthorTimelineFormatHead,248			nameCol, spark, commitsStr, codeStr, tag)249	}250251	sb.WriteString(brk)252	return sb.String()253}254255// renderAuthorTimelineSparkline projects per-bucket commit counts to a256// sparkline using the shared helper from history_render.go.257func renderAuthorTimelineSparkline(series []authorTimelineBucket, cells int) string {258	if len(series) == 0 {259		if asciiOutput() {260			return strings.Repeat(".", cells)261		}262		return strings.Repeat("▁", cells)263	}264	values := make([]float64, len(series))265	for i, b := range series {266		values[i] = float64(b.Commits)267	}268	return renderSparkline(values, cells)269}270271// authorTimelineTag derives the trailing presentation tag. Returns:272//   - "↑"        — final bucket is >= 80% of the row's peak (still active).273//   - "quiet Nmo" — trailing zero buckets cover >= 1 month of wall clock.274//   - ""         — no notable trend.275//276// Tags are tabular-only; CSV/JSON do not carry them.277func authorTimelineTag(series []authorTimelineBucket, width time.Duration) string {278	if len(series) == 0 {279		return ""280	}281	maxCommits := 0282	for _, b := range series {283		if b.Commits > maxCommits {284			maxCommits = b.Commits285		}286	}287	if maxCommits == 0 {288		return ""289	}290291	last := series[len(series)-1].Commits292	if last > 0 && float64(last) >= 0.8*float64(maxCommits) {293		return "↑"294	}295296	zeroTail := 0297	for i := len(series) - 1; i >= 0; i-- {298		if series[i].Commits != 0 {299			break300		}301		zeroTail++302	}303	if zeroTail == 0 || width <= 0 {304		return ""305	}306	totalQuiet := time.Duration(zeroTail) * width307	const month = 30 * 24 * time.Hour308	if totalQuiet < month {309		return ""310	}311	months := max(int((totalQuiet+month/2)/month), 1)312	return fmt.Sprintf("quiet %dmo", months)313}314315// formatCodeDelta renders a signed code delta with a leading sign and316// thousands separators, e.g. "+38,000" or "-21".317func formatCodeDelta(p *gmessage.Printer, delta int64) string {318	if delta >= 0 {319		return "+" + formatWithCommas(p, delta)320	}321	return "-" + formatWithCommas(p, -delta)322}323324func renderAuthorTimelineCSV(o *historyAuthorTimelineObserver) (string, error) {325	var sb strings.Builder326	sb.WriteString(formatWindowComment(o.window))327	sb.WriteByte('\n')328	_, _ = fmt.Fprintf(&sb, "# buckets: %d\n", o.bucket.N)329330	w := csv.NewWriter(&sb)331	_ = w.Write([]string{"Author", "Email", "BucketStart", "Commits", "CodeDelta"})332333	for _, r := range o.rows {334		for i, b := range r.Series {335			bucketStart := o.bucket.Start(i).UTC().Format(historyDateLayout)336			_ = w.Write([]string{337				r.Name,338				r.Email,339				bucketStart,340				fmt.Sprintf("%d", b.Commits),341				fmt.Sprintf("%d", b.CodeDelta),342			})343		}344	}345	w.Flush()346	if err := w.Error(); err != nil {347		return "", err348	}349	return sb.String(), nil350}351352type authorTimelineJSONBucket struct {353	BucketStart string `json:"bucketStart"`354	Commits     int    `json:"commits"`355	CodeDelta   int64  `json:"codeDelta"`356}357358type authorTimelineJSONAuthor struct {359	Name         string                     `json:"name"`360	Email        string                     `json:"email"`361	TotalCommits int                        `json:"totalCommits"`362	CodeDelta    int64                      `json:"codeDelta"`363	Series       []authorTimelineJSONBucket `json:"series"`364}365366type authorTimelineJSONWindow struct {367	Depth   int    `json:"depth"`368	Commits int    `json:"commits"`369	From    string `json:"from"`370	To      string `json:"to"`371}372373type authorTimelineJSONDoc struct {374	Report  string                     `json:"report"`375	Window  authorTimelineJSONWindow   `json:"window"`376	Buckets int                        `json:"buckets"`377	Authors []authorTimelineJSONAuthor `json:"authors"`378}379380func renderAuthorTimelineJSON(o *historyAuthorTimelineObserver) (string, error) {381	doc := authorTimelineJSONDoc{382		Report: "author-timeline",383		Window: authorTimelineJSONWindow{384			Depth:   o.window.Depth,385			Commits: o.window.Commits,386			From:    formatWindowDate(o.window.From),387			To:      formatWindowDate(o.window.To),388		},389		Buckets: o.bucket.N,390		Authors: make([]authorTimelineJSONAuthor, 0, len(o.rows)),391	}392	for _, r := range o.rows {393		ja := authorTimelineJSONAuthor{394			Name:         r.Name,395			Email:        r.Email,396			TotalCommits: r.TotalCommits,397			CodeDelta:    r.CodeDelta,398			Series:       make([]authorTimelineJSONBucket, 0, len(r.Series)),399		}400		for i, b := range r.Series {401			ja.Series = append(ja.Series, authorTimelineJSONBucket{402				BucketStart: o.bucket.Start(i).UTC().Format(historyDateLayout),403				Commits:     b.Commits,404				CodeDelta:   b.CodeDelta,405			})406		}407		doc.Authors = append(doc.Authors, ja)408	}409	b, err := jsoniter.Marshal(doc)410	if err != nil {411		return "", err412	}413	return string(b), nil414}

Code quality findings 12

Blank identifier discarding results; verify intentional ignoring of return values

L233

warning correctness blank-identifier-discard

_, _ = fmt.Fprintf(&sb, tabularShortAuthorTimelineFormatHead,

Blank identifier discarding results; verify intentional ignoring of return values

L247

warning correctness blank-identifier-discard

_, _ = fmt.Fprintf(&sb, tabularShortAuthorTimelineFormatHead,

Blank identifier discarding results; verify intentional ignoring of return values

L328

warning correctness blank-identifier-discard

_, _ = fmt.Fprintf(&sb, "# buckets: %d\n", o.bucket.N)

Blank identifier discarding results; verify intentional ignoring of return values

L331

warning correctness blank-identifier-discard

_ = w.Write([]string{"Author", "Email", "BucketStart", "Commits", "CodeDelta"})

Multiple appends without pre-allocation; use make() with capacity when size is known

L92

info performance append-without-prealloc

o.events = append(o.events, authorTimelineEvent{

Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies

L116

info performance copy-large-struct

for aid, s := range series {

Multiple appends without pre-allocation; use make() with capacity when size is known

L130

info performance append-without-prealloc

rows = append(rows, row)

Unstructured output; use a structured logging library (e.g., slog, zap, zerolog, logrus)

L203

info correctness fmt-println

fmt.Println("results written to " + FileOutput)

Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies

L265

info performance copy-large-struct

for i, b := range series {

Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies

L334

info performance copy-large-struct

for i, b := range r.Series {

Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies

L400

info performance copy-large-struct

for i, b := range r.Series {

Multiple appends without pre-allocation; use make() with capacity when size is known

L401

info performance append-without-prealloc

ja.Series = append(ja.Series, authorTimelineJSONBucket{

Code quality findings 12

Get this view in your editor