processor/history_blame.go GO 312 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"bufio"7	"bytes"8	"io"9	"strings"1011	"github.com/go-git/go-git/v5/plumbing/object"12)1314// authorID identifies an author after mailmap folding within a single15// observer instance. 0 is reserved for the "(before window)" sentinel that16// collects lines surviving from before the window's start commit.17type authorID uint321819const sentinelAuthorID authorID = 02021// authorRecord is the canonical (post-mailmap) identity stored against an22// authorID. The sentinel slot keeps zero values.23type authorRecord struct {24	Name  string25	Email string26}2728// authorRegistry interns (name, email) pairs into stable authorIDs after29// folding them through the mailmap, if one is set. It is keyed by canonical30// name+email so two commit identities mapped to the same canonical pair31// collapse to one authorID. When fold is true, a second index folds32// distinct emails that share the same (lowercase name, email domain) — a33// best-effort fallback for repos without a .mailmap.34type authorRegistry struct {35	nameToID     map[string]authorID36	byNameDomain map[string]authorID37	records      []authorRecord38	mm           *mailmap39	fold         bool40}4142func newAuthorRegistry(mm *mailmap) *authorRegistry {43	return newAuthorRegistryWithFold(mm, FoldAuthors)44}4546func newAuthorRegistryWithFold(mm *mailmap, fold bool) *authorRegistry {47	return &authorRegistry{48		nameToID:     map[string]authorID{},49		byNameDomain: map[string]authorID{},50		records:      []authorRecord{{}}, // slot 0 = sentinelAuthorID51		mm:           mm,52		fold:         fold,53	}54}5556// foldSkipNames are generic identity names common in default git configs.57// Two commits sharing one of these names plus a domain are very likely58// unrelated humans, so we refuse to fold them and let them stay split.59var foldSkipNames = map[string]struct{}{60	"":          {},61	"unknown":   {},62	"root":      {},63	"user":      {},64	"admin":     {},65	"dev":       {},66	"developer": {},67	"guest":     {},68	"nobody":    {},69	"none":      {},70}7172// emailDomain returns the lowercased substring after the last '@' in email,73// or "" when there is no '@'.74func emailDomain(email string) string {75	i := strings.LastIndexByte(email, '@')76	if i < 0 {77		return ""78	}79	return strings.ToLower(email[i+1:])80}8182func (r *authorRegistry) intern(name, email string) authorID {83	canonName, canonEmail := name, email84	if r.mm != nil {85		canonName, canonEmail = r.mm.Resolve(canonName, canonEmail)86	}87	key := canonName + "\x00" + canonEmail88	if id, ok := r.nameToID[key]; ok {89		return id90	}91	if r.fold {92		lowerName := strings.ToLower(canonName)93		if _, skip := foldSkipNames[lowerName]; !skip {94			domain := emailDomain(canonEmail)95			if domain != "" {96				ndKey := lowerName + "\x00" + domain97				if id, ok := r.byNameDomain[ndKey]; ok {98					r.nameToID[key] = id99					return id100				}101				id := authorID(len(r.records))102				r.records = append(r.records, authorRecord{Name: canonName, Email: canonEmail})103				r.nameToID[key] = id104				r.byNameDomain[ndKey] = id105				return id106			}107		}108	}109	id := authorID(len(r.records))110	r.records = append(r.records, authorRecord{Name: canonName, Email: canonEmail})111	r.nameToID[key] = id112	return id113}114115func (r *authorRegistry) record(id authorID) authorRecord {116	if int(id) >= len(r.records) {117		return authorRecord{}118	}119	return r.records[id]120}121122// applyDiffToBlame walks prev (the per-line author IDs from the previous123// commit) and the diff's added/removed line ranges in source order, returning124// the per-line author IDs for the new blob. Added lines are attributed to125// `commit`; equal lines copy from prev. The result is sized to newLines —126// truncated or sentinel-padded if the diff arithmetic disagrees with the127// classifier's line count (e.g. renames, trailing-newline differences).128func applyDiffToBlame(prev []authorID, newLines int, added, removed []LineRange, commit authorID) []authorID {129	out := make([]authorID, 0, newLines)130	oldPos := 1131	newPos := 1132	ai, ri := 0, 0133	oldN := len(prev)134135	for newPos <= newLines || oldPos <= oldN {136		if ri < len(removed) && oldPos == removed[ri].Start {137			oldPos += removed[ri].Count138			ri++139			continue140		}141		if ai < len(added) && newPos == added[ai].Start {142			for k := 0; k < added[ai].Count; k++ {143				out = append(out, commit)144			}145			newPos += added[ai].Count146			ai++147			continue148		}149		if oldPos <= oldN && newPos <= newLines {150			out = append(out, prev[oldPos-1])151			oldPos++152			newPos++153			continue154		}155		break156	}157158	if len(out) > newLines {159		out = out[:newLines]160	}161	for len(out) < newLines {162		out = append(out, sentinelAuthorID)163	}164	return out165}166167// mailmap is a parsed .mailmap file. The standard line forms are:168//169//	Proper Name <commit@email>170//	<proper@email> <commit@email>171//	Proper Name <proper@email> <commit@email>172//	Proper Name <proper@email> Commit Name <commit@email>173//174// Lookup is by commit email, optionally also by commit name. The replacement175// fields override Name and/or Email; absent fields leave that part of the176// commit identity unchanged.177type mailmap struct {178	byEmail        map[string]mailmapEntry179	byNameAndEmail map[string]mailmapEntry180}181182type mailmapEntry struct {183	Name  string184	Email string185}186187// Resolve returns the canonical (name, email) after applying the mailmap.188// A nil receiver is a no-op so callers can intern unmapped identities the189// same way.190func (m *mailmap) Resolve(name, email string) (string, string) {191	if m == nil {192		return name, email193	}194	lookupEmail := strings.ToLower(email)195	if e, ok := m.byNameAndEmail[name+"\x00"+lookupEmail]; ok {196		return overrideIdentity(name, email, e)197	}198	if e, ok := m.byEmail[lookupEmail]; ok {199		return overrideIdentity(name, email, e)200	}201	return name, email202}203204func overrideIdentity(name, email string, e mailmapEntry) (string, string) {205	outName, outEmail := name, email206	if e.Name != "" {207		outName = e.Name208	}209	if e.Email != "" {210		outEmail = e.Email211	}212	return outName, outEmail213}214215// parseMailmap parses a .mailmap blob into a mailmap. Comments (#) and blank216// lines are skipped; malformed lines are silently dropped.217func parseMailmap(blob []byte) *mailmap {218	m := &mailmap{219		byEmail:        map[string]mailmapEntry{},220		byNameAndEmail: map[string]mailmapEntry{},221	}222	scan := bufio.NewScanner(bytes.NewReader(blob))223	for scan.Scan() {224		line := scan.Text()225		if i := strings.IndexByte(line, '#'); i >= 0 {226			line = line[:i]227		}228		line = strings.TrimSpace(line)229		if line == "" {230			continue231		}232		parsed, ok := parseMailmapLine(line)233		if !ok {234			continue235		}236		entry := mailmapEntry{Name: parsed.properName, Email: parsed.properEmail}237		commitEmail := strings.ToLower(parsed.commitEmail)238		if parsed.commitName != "" {239			m.byNameAndEmail[parsed.commitName+"\x00"+commitEmail] = entry240		} else {241			m.byEmail[commitEmail] = entry242		}243	}244	return m245}246247type parsedMailmapLine struct {248	properName  string249	properEmail string250	commitName  string251	commitEmail string252}253254// parseMailmapLine pulls the <…> email brackets out of a line in source255// order. The last email is the commit (lookup) email. If there are two256// emails, the first is the proper (replacement) email and any free text257// between the two is the commit (lookup) name. Free text before the first258// email is the proper (replacement) name.259func parseMailmapLine(line string) (parsedMailmapLine, bool) {260	var out parsedMailmapLine261	type bracket struct{ start, end int }262	var brs []bracket263	for i := 0; i < len(line); i++ {264		if line[i] != '<' {265			continue266		}267		for j := i + 1; j < len(line); j++ {268			if line[j] == '>' {269				brs = append(brs, bracket{i, j})270				i = j271				break272			}273		}274	}275	if len(brs) == 0 {276		return out, false277	}278	if len(brs) == 1 {279		out.commitEmail = strings.TrimSpace(line[brs[0].start+1 : brs[0].end])280		out.properName = strings.TrimSpace(line[:brs[0].start])281		return out, out.commitEmail != ""282	}283	first, last := brs[0], brs[len(brs)-1]284	out.properEmail = strings.TrimSpace(line[first.start+1 : first.end])285	out.commitEmail = strings.TrimSpace(line[last.start+1 : last.end])286	out.properName = strings.TrimSpace(line[:first.start])287	out.commitName = strings.TrimSpace(line[first.end+1 : last.start])288	return out, out.commitEmail != ""289}290291// loadMailmapFromTree reads .mailmap from the given tree and parses it, or292// returns nil when the file is absent or unreadable.293func loadMailmapFromTree(tree *object.Tree) *mailmap {294	if tree == nil {295		return nil296	}297	f, err := tree.File(".mailmap")298	if err != nil {299		return nil300	}301	reader, err := f.Reader()302	if err != nil {303		return nil304	}305	defer reader.Close()306	blob, err := io.ReadAll(reader)307	if err != nil {308		return nil309	}310	return parseMailmap(blob)311}

Code quality findings 13

Declared map variable without initialization; writing to a nil map causes a panic. Use make() to initialize
warning correctness nil-map-write
var foldSkipNames = map[string]struct{}{
Ensure errors are handled or logged
warning correctness unhandled-error
if err != nil {
Ensure errors are handled or logged
warning correctness unhandled-error
if err != nil {
Ensure errors are handled or logged
warning correctness unhandled-error
if err != nil {
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
if r.fold {
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
out = append(out, commit)
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
out = append(out, prev[oldPos-1])
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
out = append(out, sentinelAuthorID)
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
for i := 0; i < len(line); i++ {
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
if line[i] != '<' {
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
for j := i + 1; j < len(line); j++ {
Deeply nested control structures reduce readability; consider extracting to functions or using early returns
info maintainability deep-nesting
if line[j] == '>' {
Multiple appends without pre-allocation; use make() with capacity when size is known
info performance append-without-prealloc
brs = append(brs, bracket{i, j})

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.