processor/history_authors.go GO 590 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"encoding/csv"7	"fmt"8	"os"9	"slices"10	"strings"11	"time"1213	jsoniter "github.com/json-iterator/go"14	"github.com/mattn/go-runewidth"15	glanguage "golang.org/x/text/language"16	gmessage "golang.org/x/text/message"17)1819// authorsTopN is the cap on tabular rows for the author rollup. CSV/JSON20// output is not capped. The remainder collapses into a single "others (N)"21// row in the tabular table.22const authorsTopN = 152324// authorNameColWidth / authorNameTrim are the trim/pad widths for the25// "Author" column in the 79-col tabular report. Both wide and short use the26// same name column.27const (28	authorNameTrim     = 3029	authorNameColWidth = 3130)3132// authorRow is one materialised row in the report. Sentinel is true for the33// "(before window)" pseudo-author whose lines pre-date the walk window.34type authorRow struct {35	Name            string36	Email           string37	Code            int6438	Comment         int6439	Complexity      int6440	Files           int41	OwnsPercent     float6442	InWindowPercent float6443	LastCommit      time.Time44	Sentinel        bool45}4647// historyAuthorsObserver accumulates per-file forward-replay blame during48// the walk, then collapses it into per-author totals on Finalise. It49// implements both CommitObserver and BaselineObserver, so the engine seeds50// it with the pre-window tree state (and the .mailmap) before the walk.51type historyAuthorsObserver struct {52	blame      map[string][]authorID53	lineTypes  map[string][]LineType54	complexity map[string][]int5556	registry *authorRegistry57	lastSeen map[authorID]time.Time5859	window   HistoryWindow60	snapshot HeadSnapshot6162	rows         []authorRow63	busFactor    int64	busAuthors   []string65	busCovered   float6466	inWindowCode int6467}6869func newHistoryAuthorsObserver() *historyAuthorsObserver {70	return &historyAuthorsObserver{71		blame:      map[string][]authorID{},72		lineTypes:  map[string][]LineType{},73		complexity: map[string][]int{},74		lastSeen:   map[authorID]time.Time{},75		registry:   newAuthorRegistry(nil),76	}77}7879// Seed installs the mailmap and seeds the per-file blame maps from the80// baseline snapshot — every pre-window line maps to sentinelAuthorID so81// surviving untouched lines are correctly attributed to "(before window)".82func (o *historyAuthorsObserver) Seed(baseline BaselineSnapshot) {83	o.registry = newAuthorRegistry(baseline.Mailmap)84	for path, bf := range baseline.Files {85		n := len(bf.LineTypes)86		if n == 0 {87			continue88		}89		o.blame[path] = make([]authorID, n) // zero value = sentinelAuthorID90		o.lineTypes[path] = bf.LineTypes91		o.complexity[path] = bf.Complexity92	}93}9495func (o *historyAuthorsObserver) Observe(c CommitInfo, changes []FileChange) {96	aid := o.registry.intern(c.Author, c.Email)97	if prev, ok := o.lastSeen[aid]; !ok || c.When.After(prev) {98		o.lastSeen[aid] = c.When99	}100	for _, fc := range changes {101		prev := o.blame[fc.Path]102		// Rename: carry the old path's per-line blame forward as the prior103		// state, then drop the stale key so it is not double-counted. A104		// pure rename has no Added/Removed ranges, so applyDiffToBlame just105		// copies the carried-forward blame — every line keeps its original106		// author. A rename with edits attributes only the edited lines to107		// the renaming commit.108		if fc.FromPath != "" && fc.FromPath != fc.Path {109			if oldBlame, ok := o.blame[fc.FromPath]; ok {110				prev = oldBlame111				delete(o.blame, fc.FromPath)112				delete(o.lineTypes, fc.FromPath)113				delete(o.complexity, fc.FromPath)114			}115		}116		newN := len(fc.LineTypes)117		o.blame[fc.Path] = applyDiffToBlame(prev, newN, fc.AddedRanges, fc.RemovedRanges, aid)118		o.lineTypes[fc.Path] = fc.LineTypes119		o.complexity[fc.Path] = fc.Complexity120	}121}122123func (o *historyAuthorsObserver) Finalise(window HistoryWindow, head HeadSnapshot) {124	o.window = window125	o.snapshot = head126127	type acc struct {128		Code       int64129		Comment    int64130		Complexity int64131		Files      int132	}133	totals := map[authorID]*acc{}134	var grandCode int64135136	for path, blame := range o.blame {137		if _, alive := head.Files[path]; !alive {138			continue139		}140		types := o.lineTypes[path]141		perFile := map[authorID]int64{}142143		for i := 0; i < len(blame) && i < len(types); i++ {144			aid := blame[i]145			a := totals[aid]146			if a == nil {147				a = &acc{}148				totals[aid] = a149			}150			switch types[i] {151			case LINE_CODE:152				a.Code++153				perFile[aid]++154				grandCode++155			case LINE_COMMENT:156				a.Comment++157			}158		}159		for _, lineNo := range o.complexity[path] {160			idx := lineNo - 1161			if idx < 0 || idx >= len(blame) {162				continue163			}164			aid := blame[idx]165			a := totals[aid]166			if a == nil {167				a = &acc{}168				totals[aid] = a169			}170			a.Complexity++171		}172173		// Plurality of code lines: who has the most code in this file. A174		// real author always outranks the sentinel — the sentinel only owns175		// the file when no real author has any code here. Tie-break on176		// smaller authorID for determinism.177		var plur authorID178		var plurCount int64179		for aid, c := range perFile {180			if aid == sentinelAuthorID {181				continue182			}183			if c > plurCount || (c == plurCount && aid < plur) {184				plur = aid185				plurCount = c186			}187		}188		if plurCount == 0 {189			// No real author has code here; fall back to the sentinel.190			if c, ok := perFile[sentinelAuthorID]; ok {191				plur = sentinelAuthorID192				plurCount = c193			}194		}195		if plurCount > 0 {196			totals[plur].Files++197		}198	}199200	var sentinelCode int64201	if s, ok := totals[sentinelAuthorID]; ok {202		sentinelCode = s.Code203	}204	inWindowCode := grandCode - sentinelCode205	o.inWindowCode = inWindowCode206207	rows := make([]authorRow, 0, len(totals))208	for aid, a := range totals {209		rec := o.registry.record(aid)210		row := authorRow{211			Name:       rec.Name,212			Email:      rec.Email,213			Code:       a.Code,214			Comment:    a.Comment,215			Complexity: a.Complexity,216			Files:      a.Files,217		}218		if grandCode > 0 {219			row.OwnsPercent = float64(a.Code) / float64(grandCode) * 100.0220		}221		if aid == sentinelAuthorID {222			row.Sentinel = true223		} else {224			if inWindowCode > 0 {225				row.InWindowPercent = float64(a.Code) / float64(inWindowCode) * 100.0226			}227			if when, ok := o.lastSeen[aid]; ok {228				row.LastCommit = when229			}230		}231		rows = append(rows, row)232	}233234	// Sentinel sorted to the end; real authors by Code desc, then Name.235	slices.SortFunc(rows, func(a, b authorRow) int {236		if a.Sentinel != b.Sentinel {237			if a.Sentinel {238				return 1239			}240			return -1241		}242		if a.Code != b.Code {243			if a.Code < b.Code {244				return 1245			}246			return -1247		}248		return strings.Compare(a.Name, b.Name)249	})250	o.rows = rows251252	cumPercent := 0.0253	for _, r := range rows {254		if r.Sentinel {255			continue256		}257		if r.Code == 0 {258			break259		}260		cumPercent += r.InWindowPercent261		o.busAuthors = append(o.busAuthors, r.Name)262		if cumPercent > 50 {263			break264		}265	}266	o.busFactor = len(o.busAuthors)267	o.busCovered = cumPercent268}269270// runAuthorsReport is the dispatch entry point called from Process() when271// --by-author is set (and --timeline is not). Opens the repo at repoPath,272// walks history with baseline seeding, and writes the chosen format to273// stdout or FileOutput.274func runAuthorsReport(repoPath string) error {275	observer := newHistoryAuthorsObserver()276	if _, err := runHistory(repoPath, observer); err != nil {277		return err278	}279	out, err := renderAuthors(observer)280	if err != nil {281		return err282	}283	if FileOutput == "" {284		fmt.Print(out)285	} else {286		if err := os.WriteFile(FileOutput, []byte(out), 0644); err != nil {287			return err288		}289		fmt.Println("results written to " + FileOutput)290	}291	return nil292}293294func renderAuthors(o *historyAuthorsObserver) (string, error) {295	switch strings.ToLower(Format) {296	case "", "tabular", "wide":297		return renderAuthorsTabular(o), nil298	case "csv":299		return renderAuthorsCSV(o)300	case "json":301		return renderAuthorsJSON(o)302	default:303		return "", fmt.Errorf("unsupported --format %q for --by-author (supported: tabular, csv, json)", Format)304	}305}306307// Short tabular: %-31s %9s %9s %7s %8s %10s = 79.308var tabularShortAuthorsFormatHead = "%-31s %9s %9s %7s %8s %10s\n"309310// Wide tabular: inserts the Comment column. %-31s %9s %9s %9s %7s %8s %10s = 88.311var tabularWideAuthorsFormatHead = "%-31s %9s %9s %9s %7s %8s %10s\n"312313func renderAuthorsTabular(o *historyAuthorsObserver) string {314	wide := More || strings.EqualFold(Format, "wide")315	brk := tabularBreakFor(wide)316317	var sb strings.Builder318	sb.WriteString(historyHeader("Authors", o.window, wide))319320	p := gmessage.NewPrinter(glanguage.Make(os.Getenv("LANG")))321322	if wide {323		_, _ = fmt.Fprintf(&sb, tabularWideAuthorsFormatHead,324			"Author", "Code", "Comment", "Cmplx", "Files", "Owns", "Last seen")325	} else {326		_, _ = fmt.Fprintf(&sb, tabularShortAuthorsFormatHead,327			"Author", "Code", "Cmplx", "Files", "Owns", "Last seen")328	}329	sb.WriteString(brk)330331	realRows := make([]authorRow, 0, len(o.rows))332	var sentinel *authorRow333	for i := range o.rows {334		r := o.rows[i]335		if r.Sentinel {336			s := r337			sentinel = &s338		} else {339			realRows = append(realRows, r)340		}341	}342343	limit := len(realRows)344	if limit > authorsTopN {345		limit = authorsTopN346	}347348	for i := 0; i < limit; i++ {349		r := realRows[i]350		writeAuthorRow(&sb, p, wide, r.Name, r.Code, r.Comment, r.Complexity,351			fmt.Sprintf("%d", r.Files), r.OwnsPercent, lastSeenString(r))352	}353354	if limit < len(realRows) {355		var (356			count      int357			code       int64358			comment    int64359			complexity int64360			owns       float64361		)362		for i := limit; i < len(realRows); i++ {363			r := realRows[i]364			count++365			code += r.Code366			comment += r.Comment367			complexity += r.Complexity368			owns += r.OwnsPercent369		}370		writeAuthorRow(&sb, p, wide,371			fmt.Sprintf("others (%d)", count), code, comment, complexity,372			"—", owns, "—")373	}374375	if sentinel != nil && (sentinel.Code+sentinel.Comment+sentinel.Complexity) > 0 {376		writeAuthorRow(&sb, p, wide,377			"(before window)", sentinel.Code, sentinel.Comment, sentinel.Complexity,378			fmt.Sprintf("%d", sentinel.Files), sentinel.OwnsPercent, "—")379	}380381	sb.WriteString(brk)382383	footerWidth := runewidth.StringWidth(strings.TrimRight(brk, "\n"))384	footer := formatAuthorsFooter(o, footerWidth)385	sb.WriteString(footer)386	sb.WriteByte('\n')387	sb.WriteString(brk)388389	return sb.String()390}391392func lastSeenString(r authorRow) string {393	if r.LastCommit.IsZero() {394		return "—"395	}396	return r.LastCommit.UTC().Format(historyDateLayout)397}398399func writeAuthorRow(sb *strings.Builder, p *gmessage.Printer, wide bool,400	name string, code, comment, complexity int64,401	files string, owns float64, lastSeen string) {402403	nameCol := unicodeAwareTrim(name, authorNameTrim)404	nameCol = unicodeAwareRightPad(nameCol, authorNameColWidth)405	codeStr := formatWithCommas(p, code)406	cmplxStr := formatWithCommas(p, complexity)407	ownsStr := fmt.Sprintf("%6.1f%%", owns)408409	if wide {410		commentStr := formatWithCommas(p, comment)411		_, _ = fmt.Fprintf(sb, tabularWideAuthorsFormatHead,412			nameCol, codeStr, commentStr, cmplxStr, files, ownsStr, lastSeen)413	} else {414		_, _ = fmt.Fprintf(sb, tabularShortAuthorsFormatHead,415			nameCol, codeStr, cmplxStr, files, ownsStr, lastSeen)416	}417}418419func formatAuthorsFooter(o *historyAuthorsObserver, width int) string {420	if o.inWindowCode == 0 {421		return "Bus factor 0 · no code touched in window"422	}423	if o.busFactor == 0 {424		return "Bus factor 0 · no authored code in window"425	}426	covered := o.busCovered427	if covered > 100 {428		covered = 100429	}430431	prefix := fmt.Sprintf("Bus factor %d · ", o.busFactor)432	var suffix string433	if o.busFactor == 1 {434		suffix = fmt.Sprintf(" last-touched %.0f%% of in-window code (single point of failure)", covered)435	} else {436		suffix = fmt.Sprintf(" last-touched %.0f%% of in-window code", covered)437	}438439	single := prefix + strings.Join(o.busAuthors, " + ") + suffix440	if width <= 0 || runewidth.StringWidth(single) <= width {441		return single442	}443	return wrapBusFactorFooter(prefix, o.busAuthors, suffix, width)444}445446// wrapBusFactorFooter word-wraps the bus-factor line on " + " token447// boundaries when it would otherwise exceed width. Continuation lines are448// indented to align under the first name so the structure stays readable.449// Width is measured in display columns (runewidth), not bytes, so non-ASCII450// author names and CI-mode ASCII breaks both produce the right wrap point.451func wrapBusFactorFooter(prefix string, names []string, suffix string, width int) string {452	indent := strings.Repeat(" ", runewidth.StringWidth(prefix))453	var sb strings.Builder454	line := prefix455	lineWidth := runewidth.StringWidth(line)456457	for i, name := range names {458		token := name459		if i < len(names)-1 {460			token += " + "461		}462		tokenWidth := runewidth.StringWidth(token)463		if line != prefix && line != indent && lineWidth+tokenWidth > width {464			sb.WriteString(strings.TrimRight(line, " "))465			sb.WriteByte('\n')466			line = indent467			lineWidth = runewidth.StringWidth(indent)468		}469		line += token470		lineWidth += tokenWidth471	}472473	suffixWidth := runewidth.StringWidth(suffix)474	if lineWidth+suffixWidth > width && line != indent {475		sb.WriteString(strings.TrimRight(line, " "))476		sb.WriteByte('\n')477		line = indent + strings.TrimLeft(suffix, " ")478	} else {479		line += suffix480	}481	sb.WriteString(line)482	return sb.String()483}484485func renderAuthorsCSV(o *historyAuthorsObserver) (string, error) {486	var sb strings.Builder487	sb.WriteString(formatWindowComment(o.window))488	sb.WriteByte('\n')489490	w := csv.NewWriter(&sb)491	_ = w.Write([]string{492		"Author", "Email", "Code", "Complexity", "Comment", "Files",493		"OwnsPercent", "LastCommit", "BeforeWindow",494	})495	for _, r := range o.rows {496		name, email := r.Name, r.Email497		lastCommit := ""498		beforeWindow := "false"499		if r.Sentinel {500			name, email = "", ""501			beforeWindow = "true"502		} else if !r.LastCommit.IsZero() {503			lastCommit = r.LastCommit.UTC().Format(historyDateLayout)504		}505		_ = w.Write([]string{506			name,507			email,508			fmt.Sprintf("%d", r.Code),509			fmt.Sprintf("%d", r.Complexity),510			fmt.Sprintf("%d", r.Comment),511			fmt.Sprintf("%d", r.Files),512			fmt.Sprintf("%.1f", r.OwnsPercent),513			lastCommit,514			beforeWindow,515		})516	}517	w.Flush()518	if err := w.Error(); err != nil {519		return "", err520	}521	return sb.String(), nil522}523524type authorsJSONAuthor struct {525	Name            *string `json:"name"`526	Email           *string `json:"email"`527	Code            int64   `json:"code"`528	Complexity      int64   `json:"complexity"`529	Comment         int64   `json:"comment"`530	Files           int     `json:"files"`531	OwnsPercent     float64 `json:"ownsPercent"`532	InWindowPercent float64 `json:"inWindowPercent"`533	LastCommit      string  `json:"lastCommit,omitempty"`534	BeforeWindow    bool    `json:"beforeWindow"`535}536537type authorsJSONWindow struct {538	Depth   int    `json:"depth"`539	Commits int    `json:"commits"`540	From    string `json:"from"`541	To      string `json:"to"`542}543544type authorsJSONDoc struct {545	Report    string              `json:"report"`546	Window    authorsJSONWindow   `json:"window"`547	BusFactor int                 `json:"busFactor"`548	Authors   []authorsJSONAuthor `json:"authors"`549}550551func renderAuthorsJSON(o *historyAuthorsObserver) (string, error) {552	doc := authorsJSONDoc{553		Report: "authors",554		Window: authorsJSONWindow{555			Depth:   o.window.Depth,556			Commits: o.window.Commits,557			From:    formatWindowDate(o.window.From),558			To:      formatWindowDate(o.window.To),559		},560		BusFactor: o.busFactor,561		Authors:   make([]authorsJSONAuthor, 0, len(o.rows)),562	}563	for _, r := range o.rows {564		a := authorsJSONAuthor{565			Code:            r.Code,566			Complexity:      r.Complexity,567			Comment:         r.Comment,568			Files:           r.Files,569			OwnsPercent:     round1(r.OwnsPercent),570			InWindowPercent: round1(r.InWindowPercent),571		}572		if r.Sentinel {573			a.BeforeWindow = true574		} else {575			name, email := r.Name, r.Email576			a.Name = &name577			a.Email = &email578			if !r.LastCommit.IsZero() {579				a.LastCommit = r.LastCommit.UTC().Format(historyDateLayout)580			}581		}582		doc.Authors = append(doc.Authors, a)583	}584	b, err := jsoniter.Marshal(doc)585	if err != nil {586		return "", err587	}588	return string(b), nil589}

Code quality findings 14

Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularWideAuthorsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularShortAuthorsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(sb, tabularWideAuthorsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(sb, tabularShortAuthorsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = w.Write([]string{
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = w.Write([]string{
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for path, bf := range baseline.Files {
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for path, blame := range o.blame {
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for aid, c := range perFile {
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for aid, a := range totals {
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
o.busAuthors = append(o.busAuthors, r.Name)
Unstructured output; use a structured logging library (e.g., slog, zap, zerolog, logrus)
info correctness fmt-println
fmt.Println("results written to " + FileOutput)
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for i, name := range names {
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
doc.Authors = append(doc.Authors, a)

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.