processor/history_hotspots.go GO 389 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"encoding/csv"7	"fmt"8	"os"9	"slices"10	"strings"1112	jsoniter "github.com/json-iterator/go"13	glanguage "golang.org/x/text/language"14	gmessage "golang.org/x/text/message"15)1617// HotspotsTopN is the cap on rows shown in the tabular hotspots report.18// CSV / JSON output is not capped.19const HotspotsTopN = 202021// runHotspotsReport is the dispatch entry point called from Process() when22// --hotspots is set. Opens the repo at repoPath, walks history, and writes23// the chosen format to stdout or FileOutput.24func runHotspotsReport(repoPath string) error {25	observer := newHotspotsObserver()26	if _, err := runHistory(repoPath, observer); err != nil {27		return err28	}29	out, err := renderHotspots(observer)30	if err != nil {31		return err32	}33	if FileOutput == "" {34		fmt.Print(out)35	} else {36		if err := os.WriteFile(FileOutput, []byte(out), 0644); err != nil {37			return err38		}39		fmt.Println("results written to " + FileOutput)40	}41	return nil42}4344// hotspotsRecord is the per-file accumulator and the final row.45type hotspotsRecord struct {46	File         string47	Language     string48	Complexity   int6449	Commits      int50	LinesChanged int6451	Authors      map[authorID]struct{}52	CodeChurn    int6453	CommentChurn int6454	Score        float6455}5657// hotspotsObserver accumulates per-file commit / churn / author stats during58// the walk, then materialises the table at Finalise using the HEAD snapshot59// for current language and complexity. Implements MailmapObserver so the60// Authrs column folds identities the same way the author rollup does,61// without paying for the full baseline tree classification.62type hotspotsObserver struct {63	files    map[string]*hotspotsRecord64	registry *authorRegistry65	window   HistoryWindow66	snapshot HeadSnapshot67	records  []hotspotsRecord68	totalRaw int // total files seen across the window (for the "X of Y" footer)69}7071func newHotspotsObserver() *hotspotsObserver {72	return &hotspotsObserver{73		files:    map[string]*hotspotsRecord{},74		registry: newAuthorRegistry(nil),75	}76}7778// SetMailmap satisfies MailmapObserver — rebuilds the registry with the79// repo's .mailmap so Authrs folds identities the same way the author rollup80// does.81func (o *hotspotsObserver) SetMailmap(mm *mailmap) {82	o.registry = newAuthorRegistry(mm)83}8485func (o *hotspotsObserver) Observe(c CommitInfo, changes []FileChange) {86	aid := o.registry.intern(c.Author, c.Email)87	for _, fc := range changes {88		// Rename: migrate the old path's accumulator so churn history is89		// continuous across the rename.90		if fc.FromPath != "" && fc.FromPath != fc.Path {91			if old, ok := o.files[fc.FromPath]; ok {92				old.File = fc.Path93				o.files[fc.Path] = old94				delete(o.files, fc.FromPath)95			}96		}97		rec := o.files[fc.Path]98		if rec == nil {99			rec = &hotspotsRecord{100				File:    fc.Path,101				Authors: map[authorID]struct{}{},102			}103			o.files[fc.Path] = rec104		}105		rec.Commits++106		added := countRangeLines(fc.AddedRanges)107		removed := countRangeLines(fc.RemovedRanges)108		rec.LinesChanged += int64(added + removed)109		rec.Authors[aid] = struct{}{}110111		code, comment := splitChurnByType(fc.AddedRanges, fc.LineTypes)112		rec.CodeChurn += int64(code)113		rec.CommentChurn += int64(comment)114	}115}116117func (o *hotspotsObserver) Finalise(window HistoryWindow, head HeadSnapshot) {118	o.window = window119	o.snapshot = head120	o.totalRaw = 0121122	for path, rec := range o.files {123		hf, alive := head.Files[path]124		if !alive {125			continue126		}127		rec.Language = hf.Language128		rec.Complexity = hf.Complexity129		o.totalRaw++130131		score := float64(rec.Complexity) * float64(rec.Commits)132		rec.Score = score133	}134135	records := make([]hotspotsRecord, 0, o.totalRaw)136	for path, rec := range o.files {137		if _, alive := head.Files[path]; !alive {138			continue139		}140		records = append(records, *rec)141	}142143	// Normalise 0–100 across the surviving set.144	maxScore := 0.0145	for _, r := range records {146		if r.Score > maxScore {147			maxScore = r.Score148		}149	}150	for i := range records {151		if maxScore > 0 {152			records[i].Score = records[i].Score / maxScore * 100.0153		}154	}155156	slices.SortFunc(records, func(a, b hotspotsRecord) int {157		if a.Score == b.Score {158			return strings.Compare(a.File, b.File)159		}160		if a.Score < b.Score {161			return 1162		}163		return -1164	})165	o.records = records166}167168// countRangeLines sums the line counts across a slice of line ranges.169func countRangeLines(ranges []LineRange) int {170	total := 0171	for _, r := range ranges {172		total += r.Count173	}174	return total175}176177// splitChurnByType classifies *added* lines only into code vs comment buckets178// using the per-line LineType vector for the new blob. Removed lines aren't179// classified — the old blob isn't fetched on the churn path. Reported as180// +Code% in the tabular output. Blank lines don't count toward either bucket.181func splitChurnByType(added []LineRange, lineTypes []LineType) (code, comment int) {182	for _, r := range added {183		for i := 0; i < r.Count; i++ {184			ln := r.Start - 1 + i // 0-based index into lineTypes185			if ln < 0 || ln >= len(lineTypes) {186				continue187			}188			switch lineTypes[ln] {189			case LINE_CODE:190				code++191			case LINE_COMMENT:192				comment++193			}194		}195	}196	return197}198199// renderHotspots returns the formatted output for the chosen --format.200func renderHotspots(o *hotspotsObserver) (string, error) {201	switch strings.ToLower(Format) {202	case "", "tabular", "wide":203		return renderHotspotsTabular(o), nil204	case "csv":205		return renderHotspotsCSV(o)206	case "json":207		return renderHotspotsJSON(o)208	default:209		return "", fmt.Errorf("unsupported --format %q for --hotspots (supported: tabular, csv, json)", Format)210	}211}212213// Tabular column formats.214//215//	%-27s %8s %7s %8s %8s %7s %8s216//	27 + 1 + 8 + 1 + 7 + 1 + 8 + 1 + 8 + 1 + 7 + 1 + 8 = 79217var tabularShortHotspotsFormatHead = "%-27s %8s %7s %8s %8s %7s %8s\n"218var tabularShortHotspotsFormatBody = "%-27s %8s %7d %8d %8s %7d %8.1f\n"219220// Wide variant — 109 columns, adds a 9-char hotspot bar and a +Code% column221// (%-share of *added* lines that were code; removed lines aren't classified).222//223//	%-27s %8s %7s %8s %8s %7s %8s %7s %11s224//	27 + 1 + 8 + 1 + 7 + 1 + 8 + 1 + 8 + 1 + 7 + 1 + 8 + 1 + 7 + 1 + 11 = 98225var tabularWideHotspotsFormatHead = "%-27s %8s %7s %8s %8s %7s %8s %7s %-11s\n"226var tabularWideHotspotsFormatBody = "%-27s %8s %7d %8d %8s %7d %8.1f %6.1f%% %-11s\n"227228func renderHotspotsTabular(o *hotspotsObserver) string {229	wide := More || strings.EqualFold(Format, "wide")230	brk := tabularBreakFor(wide)231232	var sb strings.Builder233	sb.WriteString(historyHeader("Hotspots", o.window, wide))234235	printer := gmessage.NewPrinter(glanguage.Make(os.Getenv("LANG")))236	if wide {237		_, _ = fmt.Fprintf(&sb, tabularWideHotspotsFormatHead,238			"File", "Lang", "Cmplx", "Commits", "Lines±", "Authrs", "Hotspot", "+Code%", "Bar")239	} else {240		_, _ = fmt.Fprintf(&sb, tabularShortHotspotsFormatHead,241			"File", "Lang", "Cmplx", "Commits", "Lines±", "Authrs", "Hotspot")242	}243	sb.WriteString(brk)244245	limit := min(len(o.records), HotspotsTopN)246247	for i := range limit {248		r := o.records[i]249		fileCol := unicodeAwareTrim(r.File, 26)250		fileCol = unicodeAwareRightPad(fileCol, 27)251		langCol := trimLanguageShort(r.Language, 8)252		linesCol := formatWithCommas(printer, r.LinesChanged)253		if wide {254			codeShare := 0.0255			totalChurn := r.CodeChurn + r.CommentChurn256			if totalChurn > 0 {257				codeShare = float64(r.CodeChurn) / float64(totalChurn) * 100.0258			}259			bar := renderBar(r.Score/100.0, 11)260			_, _ = fmt.Fprintf(&sb, tabularWideHotspotsFormatBody,261				fileCol, langCol, r.Complexity, r.Commits, linesCol,262				len(r.Authors), r.Score, codeShare, bar)263		} else {264			_, _ = fmt.Fprintf(&sb, tabularShortHotspotsFormatBody,265				fileCol, langCol, r.Complexity, r.Commits, linesCol,266				len(r.Authors), r.Score)267		}268	}269270	sb.WriteString(brk)271	if limit > 0 {272		footer := fmt.Sprintf("   complexity × change-frequency, normalised · %d of %d files shown",273			limit, o.totalRaw)274		sb.WriteString(footer)275		sb.WriteByte('\n')276		sb.WriteString(brk)277	}278	return sb.String()279}280281func formatWithCommas(p *gmessage.Printer, n int64) string {282	return p.Sprintf("%d", n)283}284285func trimLanguageShort(lang string, size int) string {286	if len(lang) <= size {287		return lang288	}289	// keep most informative bit290	return lang[:size-1] + "…"291}292293func renderHotspotsCSV(o *hotspotsObserver) (string, error) {294	var sb strings.Builder295	sb.WriteString(formatWindowComment(o.window))296	sb.WriteByte('\n')297298	w := csv.NewWriter(&sb)299	_ = w.Write([]string{300		"File", "Language", "Complexity", "Commits",301		"LinesChanged", "Authors", "CodeChurn", "CommentChurn", "Score",302	})303304	for _, r := range o.records {305		if r.Score <= 0 {306			continue307		}308		_ = w.Write([]string{309			r.File,310			r.Language,311			fmt.Sprintf("%d", r.Complexity),312			fmt.Sprintf("%d", r.Commits),313			fmt.Sprintf("%d", r.LinesChanged),314			fmt.Sprintf("%d", len(r.Authors)),315			fmt.Sprintf("%d", r.CodeChurn),316			fmt.Sprintf("%d", r.CommentChurn),317			fmt.Sprintf("%.1f", r.Score),318		})319	}320	w.Flush()321	if err := w.Error(); err != nil {322		return "", err323	}324	return sb.String(), nil325}326327type hotspotsJSONFile struct {328	File         string  `json:"file"`329	Language     string  `json:"language"`330	Complexity   int64   `json:"complexity"`331	Commits      int     `json:"commits"`332	LinesChanged int64   `json:"linesChanged"`333	Authors      int     `json:"authors"`334	CodeChurn    int64   `json:"codeChurn"`335	CommentChurn int64   `json:"commentChurn"`336	Score        float64 `json:"score"`337}338339type hotspotsJSONWindow struct {340	Depth   int    `json:"depth"`341	Commits int    `json:"commits"`342	From    string `json:"from"`343	To      string `json:"to"`344}345346type hotspotsJSONDoc struct {347	Report string             `json:"report"`348	Window hotspotsJSONWindow `json:"window"`349	Files  []hotspotsJSONFile `json:"files"`350}351352func renderHotspotsJSON(o *hotspotsObserver) (string, error) {353	doc := hotspotsJSONDoc{354		Report: "hotspots",355		Window: hotspotsJSONWindow{356			Depth:   o.window.Depth,357			Commits: o.window.Commits,358			From:    formatWindowDate(o.window.From),359			To:      formatWindowDate(o.window.To),360		},361		Files: make([]hotspotsJSONFile, 0, len(o.records)),362	}363	for _, r := range o.records {364		if r.Score <= 0 {365			continue366		}367		doc.Files = append(doc.Files, hotspotsJSONFile{368			File:         r.File,369			Language:     r.Language,370			Complexity:   r.Complexity,371			Commits:      r.Commits,372			LinesChanged: r.LinesChanged,373			Authors:      len(r.Authors),374			CodeChurn:    r.CodeChurn,375			CommentChurn: r.CommentChurn,376			Score:        round1(r.Score),377		})378	}379	b, err := jsoniter.Marshal(doc)380	if err != nil {381		return "", err382	}383	return string(b), nil384}385386func round1(f float64) float64 {387	return float64(int64(f*10+0.5)) / 10388}

Code quality findings 11

Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularWideHotspotsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularShortHotspotsFormatHead,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularWideHotspotsFormatBody,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = fmt.Fprintf(&sb, tabularShortHotspotsFormatBody,
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = w.Write([]string{
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = w.Write([]string{
Unstructured output; use a structured logging library (e.g., slog, zap, zerolog, logrus)
info correctness fmt-println
fmt.Println("results written to " + FileOutput)
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
if old, ok := o.files[fc.FromPath]; ok {
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for path, rec := range o.files {
Range over slice copies each element by value; use index or pointer receiver for large structs to avoid copies
info performance copy-large-struct
for path, rec := range o.files {
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
doc.Files = append(doc.Files, hotspotsJSONFile{

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.