Declared map variable without initialization; writing to a nil map causes a panic. Use make() to initialize
var ReportSkipNames = map[string]bool{}
1// SPDX-License-Identifier: MIT23package processor45import (6 "fmt"7 "html/template"8 "os"9 "os/exec"10 "path/filepath"11 "regexp"12 "sort"13 "strings"14 "sync"15 "time"1617 "github.com/boyter/gocodewalker"18 "github.com/go-git/go-git/v5"19)2021// DefaultReportName is the file name used when --report is invoked without22// a path (pflag's NoOptDefVal). main.go wires this in as the bare-flag23// default; runReport compares ReportOut to it to decide whether the user24// supplied an explicit path or relied on the default.25const DefaultReportName = "scc-report.html"2627// ReportOut is the output path supplied via --report. Empty means report28// mode is off; any other value (including DefaultReportName when the user29// passed a bare `--report`) flips Process() into the HTML-report branch.30var ReportOut = ""3132// ReportSkip is the raw comma-separated value supplied via --report-skip.33// Process() parses it into ReportSkipNames before the report runs.34var ReportSkip = ""3536// ReportSkipNames is the parsed, lower-cased set of section names supplied37// via --report-skip. Wired from main.go (spec 05). CollectReportData reads38// this through ReportSkipped to decide which *Result pointers to nil out39// before returning.40var ReportSkipNames = map[string]bool{}4142// ReportTitle is the override for the repo name used in the report banner43// (spec 05). Empty means "auto-detect".44var ReportTitle = ""4546// reportSkipRecognised is the set of section names --report-skip accepts.47// Kept here (next to ReportSkipped) so future template authors can find the48// authoritative list in one place. Names must match what the report template49// and CollectReportData branch on. Spec 05 fixes this set.50var reportSkipRecognised = map[string]bool{51 "cocomo": true,52 "locomo": true,53 "hotspots": true,54 "authors": true,55 "timeline": true,56 "files": true,57 "uloc": true,58 "linelength": true,59 "card": true,60}6162// ReportSkipped reports whether the given section name was listed in63// --report-skip. Section names are case-insensitive — callers can pass64// either case.65func ReportSkipped(section string) bool {66 if len(ReportSkipNames) == 0 {67 return false68 }69 return ReportSkipNames[strings.ToLower(section)]70}7172// Totals captures the headline numbers shown in the report's Overview strip.73// Mirrors the sums computed by the tabular formatter (sumFiles / sumLines /74// …) but pulled into a struct so the template can read them by name.75type Totals struct {76 Files int6477 Lines int6478 Code int6479 Comment int6480 Blank int6481 Complexity int6482 Bytes int6483}8485// ULOCResult is the unique-lines-of-code rollup. Maps are converted to a86// stable slice here so the template can range deterministically.87type ULOCResult struct {88 Global int89 PerLanguage []ULOCLanguage90 TotalLines int6491 Dryness float6492}9394// ULOCLanguage is one row of the per-language ULOC slice. Sorted by ULOC95// descending, then name ascending.96type ULOCLanguage struct {97 Language string98 ULOC int99}100101// LineLengthBucket is one bar in the line-length histogram. Edges are102// inclusive-left, exclusive-right except for the open-ended tail bucket.103type LineLengthBucket struct {104 Start int // inclusive105 End int // exclusive; 0 means "no upper bound" (the tail bucket)106 Count int64107 Label string // e.g. "0–20", "120+"108}109110// LineLengthOutlier is one entry in the longest-lines callout list.111type LineLengthOutlier struct {112 File string113 Language string114 LineLength int115}116117// LineLengthResult is the line-length histogram and summary statistics.118type LineLengthResult struct {119 Buckets []LineLengthBucket120 Mean float64121 Max int122 Outliers []LineLengthOutlier123 TotalLines int64124}125126// HotspotsResult mirrors the data the tabular hotspot formatter consumes.127// Records is already sorted by Score desc.128type HotspotsResult struct {129 Window HistoryWindow130 Records []HotspotRow131 TotalRaw int132 Available bool133}134135// HotspotRow is one row of the hotspots table. Pulled out so report consumers136// don't depend on the private hotspotsRecord type.137type HotspotRow struct {138 File string139 Language string140 Complexity int64141 Commits int142 LinesChanged int64143 Authors int144 CodeChurn int64145 CommentChurn int64146 Score float64147}148149// AuthorsResult mirrors the data the authors tabular formatter consumes. The150// Sentinel pseudo-row (`(before window)`) is included in Rows; consumers151// filter or call it out separately.152type AuthorsResult struct {153 Window HistoryWindow154 Rows []AuthorRow155 BusFactor int156 BusAuthors []string157 BusCovered float64158 InWindowCode int64159}160161// AuthorRow is one row of the authors rollup table. Mirrors authorRow but162// public for template consumers.163type AuthorRow struct {164 Name string165 Email string166 Code int64167 Comment int64168 Complexity int64169 Files int170 OwnsPercent float64171 InWindowPercent float64172 LastCommit time.Time173 Sentinel bool174}175176// LangTimelineResult mirrors the language-timeline observer output.177type LangTimelineResult struct {178 Window HistoryWindow179 Bucket Bucketing180 Rows []LangTimelineRow181 Buckets int182}183184// LangTimelineRow is one row of the language timeline table.185type LangTimelineRow struct {186 Language string187 StartingLines int64188 CodeNow int64189 Change int64190 SharePercent float64191 Deltas []int64192 Trajectory []int64193}194195// AuthorTimelineResult mirrors the author-timeline observer output.196type AuthorTimelineResult struct {197 Window HistoryWindow198 Bucket Bucketing199 Rows []AuthorTimelineRow200 Buckets int201}202203// AuthorTimelineRow is one row of the author timeline table.204type AuthorTimelineRow struct {205 Name string206 Email string207 TotalCommits int208 CodeDelta int64209 Series []AuthorTimelineBucket210}211212// AuthorTimelineBucket is one bucket of an author's timeline series.213type AuthorTimelineBucket struct {214 Commits int215 CodeDelta int64216}217218// ReportData is the in-memory aggregate produced by CollectReportData. The219// HTML template consumes one of these values per report run.220type ReportData struct {221 // Metadata222 RepoName string223 GeneratedAt time.Time224 SccVersion string225 Duration time.Duration226 GitAvailable bool227228 // Default rollup (always present)229 Summary []LanguageSummary230 Totals Totals231232 // Optional analyses — nil/empty if skipped or unavailable.233 ULOC *ULOCResult234 LineLength *LineLengthResult235 Hotspots *HotspotsResult236 Authors *AuthorsResult237 LanguageTimeline *LangTimelineResult238 AuthorTimeline *AuthorTimelineResult239 Files []*FileJob240241 // Cost242 Cocomo *CocomoResult243 Locomo *LocomoResult244245 // Rendered share-card SVG (data: URL safe). Populated by RenderReport246 // before the main template runs so it can be embedded as og:image.247 CardSVG template.HTML248}249250// reportFlagState snapshots the package-level flag vars CollectReportData251// flips on entry so they can be restored on exit.252//253// scc's analysis modes (ULOC, line-length, per-file table) are gated by254// process-wide globals. The report mode flips them on inside a single255// invocation; we snapshot and restore via defer so panics, errors, or256// in-process re-entrancy don't leak state into a later scc call.257type reportFlagState struct {258 UlocMode bool259 MaxMean bool260 Files bool261}262263func saveReportFlags() reportFlagState {264 return reportFlagState{265 UlocMode: UlocMode,266 MaxMean: MaxMean,267 Files: Files,268 }269}270271func (s reportFlagState) restore() {272 UlocMode = s.UlocMode273 MaxMean = s.MaxMean274 Files = s.Files275}276277// CollectReportData orchestrates the full scc analysis surface for one278// report. It walks the tree once for default counts, runs the git-history279// observers (when git is available), computes cost estimates, and returns a280// ReportData ready for HTML templating.281//282// IMPORTANT: this function mutates the package-level analysis flags283// (UlocMode, MaxMean, Files) while it runs. The previous values are284// snapshotted and restored via defer, but callers should not assume the285// flags retain their on-entry values during the call.286func CollectReportData(path string) (ReportData, error) {287 start := time.Now()288289 saved := saveReportFlags()290 defer saved.restore()291292 if !ReportSkipped("uloc") {293 UlocMode = true294 }295 if !ReportSkipped("linelength") {296 MaxMean = true297 }298 if !ReportSkipped("files") {299 Files = true300 }301302 // Reset the package-level ULOC accumulators so repeated in-process303 // invocations don't see stale data from an earlier walk.304 ulocMutex.Lock()305 ulocGlobalCount = map[string]struct{}{}306 ulocLanguageCount = map[string]map[string]struct{}{}307 ulocMutex.Unlock()308309 gitAvailable := detectGit(path)310311 data := ReportData{312 GeneratedAt: time.Now().UTC(),313 SccVersion: Version,314 GitAvailable: gitAvailable,315 RepoName: detectRepoName(path),316 }317318 files, summary, totals, err := walkAndAggregate(path)319 if err != nil {320 return ReportData{}, err321 }322 data.Files = files323 data.Summary = summary324 data.Totals = totals325326 if !ReportSkipped("uloc") {327 data.ULOC = snapshotULOC(totals.Lines)328 }329330 if !ReportSkipped("linelength") {331 data.LineLength = bucketLineLengths(files)332 }333334 if gitAvailable {335 if !ReportSkipped("hotspots") {336 obs := newHotspotsObserver()337 if window, err := runHistory(path, obs); err == nil {338 data.Hotspots = hotspotsResultFromObserver(obs, window)339 } else {340 printWarnF("report: hotspots observer failed: %s", err)341 }342 }343 if !ReportSkipped("authors") {344 obs := newHistoryAuthorsObserver()345 if window, err := runHistory(path, obs); err == nil {346 data.Authors = authorsResultFromObserver(obs, window)347 } else {348 printWarnF("report: authors observer failed: %s", err)349 }350 }351 if !ReportSkipped("timeline") {352 lObs := newHistoryLanguagesObserver(HistoryBuckets)353 if window, err := runHistory(path, lObs); err == nil {354 data.LanguageTimeline = languageTimelineResultFromObserver(lObs, window)355 } else {356 printWarnF("report: language timeline observer failed: %s", err)357 }358 aObs := newHistoryAuthorTimelineObserver(HistoryBuckets)359 if window, err := runHistory(path, aObs); err == nil {360 data.AuthorTimeline = authorTimelineResultFromObserver(aObs, window)361 } else {362 printWarnF("report: author timeline observer failed: %s", err)363 }364 }365 }366367 if !Cocomo && !ReportSkipped("cocomo") {368 c := computeCocomo(totals.Code)369 data.Cocomo = &c370 }371 if !ReportSkipped("locomo") {372 l := computeLocomo(totals.Code, totals.Complexity)373 data.Locomo = &l374 }375376 data.Duration = time.Since(start)377 return data, nil378}379380// detectGit returns true if the path (or any parent) contains a git working381// directory. Uses go-git's PlainOpenWithOptions with DetectDotGit so callers382// can pass a subdirectory of a repo. Cached behaviour is not needed here —383// this is called once at the start of CollectReportData.384func detectGit(path string) bool {385 _, err := git.PlainOpenWithOptions(path, &git.PlainOpenOptions{DetectDotGit: true})386 return err == nil387}388389// detectRepoName implements the resolution chain from spec 05:390// 1. ReportTitle (set from --report-title) if non-empty.391// 2. Last path segment of `git config --get remote.origin.url` (strip `.git`).392// 3. Basename of the analysed path.393// 4. "scc report" fallback.394func detectRepoName(path string) string {395 if ReportTitle != "" {396 return ReportTitle397 }398 if name := remoteOriginName(path); name != "" {399 return name400 }401 abs, err := filepath.Abs(path)402 if err == nil && abs != "" {403 base := filepath.Base(abs)404 if base != "" && base != "." && base != string(filepath.Separator) {405 return base406 }407 }408 return "scc report"409}410411// remoteOriginName runs `git config --get remote.origin.url` inside path and412// returns the last segment of the URL with a trailing `.git` stripped. Empty413// when git is unavailable, the command fails, or the remote isn't set.414func remoteOriginName(path string) string {415 cmd := exec.Command("git", "config", "--get", "remote.origin.url")416 cmd.Dir = path417 out, err := cmd.Output()418 if err != nil {419 return ""420 }421 url := strings.TrimSpace(string(out))422 if url == "" {423 return ""424 }425 // Strip transport prefix (https://, git@host:) and trailing slash.426 url = strings.TrimSuffix(url, "/")427 // Take last path-or-colon segment.428 for _, sep := range []string{"/", ":"} {429 if idx := strings.LastIndex(url, sep); idx >= 0 {430 url = url[idx+1:]431 }432 }433 url = strings.TrimSuffix(url, ".git")434 return url435}436437// walkAndAggregate runs scc's standard file walker against path, drains the438// resulting FileJob channel once, and tees the results into the language439// rollup and a flat per-file slice. Reuses aggregateLanguageSummary by440// feeding it the same FileJobs through a buffered channel.441func walkAndAggregate(path string) ([]*FileJob, []LanguageSummary, Totals, error) {442 if path == "" {443 path = "."444 }445446 fpath := filepath.Clean(path)447 info, err := os.Stat(fpath)448 if err != nil {449 return nil, nil, Totals{}, fmt.Errorf("file or directory could not be read: %s", fpath)450 }451452 dirPaths := []string{}453 filePaths := []string{}454 if info.IsDir() {455 dirPaths = append(dirPaths, fpath)456 } else {457 filePaths = append(filePaths, fpath)458 }459460 ctx := processorContext{remap: newRemapConfig(RemapAll, RemapUnknown)}461462 potentialFilesQueue := make(chan *gocodewalker.File, FileListQueueSize)463 fileListQueue := make(chan *FileJob, FileListQueueSize)464 fileSummaryJobQueue := make(chan *FileJob, FileSummaryJobQueueSize)465466 if len(dirPaths) > 0 {467 fileWalker := gocodewalker.NewParallelFileWalker(dirPaths, potentialFilesQueue)468 fileWalker.SetErrorHandler(func(e error) bool {469 printError(e.Error())470 return true471 })472 fileWalker.IgnoreGitIgnore = GitIgnore473 fileWalker.IgnoreIgnoreFile = Ignore474 fileWalker.IgnoreGitModules = GitModuleIgnore475 fileWalker.IncludeHidden = true476 fileWalker.ExcludeDirectory = PathDenyList477 fileWalker.SetConcurrency(DirectoryWalkerJobWorkers)478 if !SccIgnore {479 fileWalker.CustomIgnore = []string{".sccignore"}480 }481482 var excludePathRegexes []*regexp.Regexp483 for _, exclude := range Exclude {484 re, err := regexp.Compile(exclude)485 if err == nil {486 fileWalker.ExcludeFilenameRegex = append(fileWalker.ExcludeFilenameRegex, re)487 fileWalker.ExcludeDirectoryRegex = append(fileWalker.ExcludeDirectoryRegex, re)488 excludePathRegexes = append(excludePathRegexes, re)489 } else {490 printError(err.Error())491 }492 }493494 go func() {495 if err := fileWalker.Start(); err != nil {496 printError(err.Error())497 }498 }()499500 go func() {501 for fi := range potentialFilesQueue {502 shouldExclude := false503 for _, re := range excludePathRegexes {504 if re.MatchString(fi.Location) {505 shouldExclude = true506 break507 }508 }509 if shouldExclude {510 continue511 }512 fileInfo, err := os.Lstat(fi.Location)513 if err != nil {514 continue515 }516 if !fileInfo.IsDir() {517 if job := newFileJob(fi.Location, fi.Filename, fileInfo); job != nil {518 fileListQueue <- job519 }520 }521 }522 close(fileListQueue)523 }()524 } else {525 go func() {526 for _, f := range filePaths {527 fileInfo, err := os.Lstat(f)528 if err != nil {529 continue530 }531 if job := newFileJob(f, f, fileInfo); job != nil {532 fileListQueue <- job533 }534 }535 close(fileListQueue)536 }()537 }538539 go ctx.fileProcessorWorker(fileListQueue, fileSummaryJobQueue)540541 // Tee: as each FileJob arrives, append to the flat slice and forward to542 // a buffered channel that aggregateLanguageSummary drains. We forward543 // synchronously so totals/files always see the same set.544 aggregateInput := make(chan *FileJob, FileSummaryJobQueueSize)545 var (546 files []*FileJob547 totals Totals548 mu sync.Mutex549 )550551 go func() {552 for job := range fileSummaryJobQueue {553 mu.Lock()554 files = append(files, job)555 totals.Files++556 totals.Lines += job.Lines557 totals.Code += job.Code558 totals.Comment += job.Comment559 totals.Blank += job.Blank560 totals.Complexity += job.Complexity561 totals.Bytes += job.Bytes562 mu.Unlock()563 aggregateInput <- job564 }565 close(aggregateInput)566 }()567568 summary := aggregateLanguageSummary(aggregateInput)569 summary = sortLanguageSummary(summary)570571 // Ensure deterministic ordering of the flat Files slice — the worker572 // pool can interleave file emissions.573 sort.Slice(files, func(i, j int) bool {574 return files[i].Location < files[j].Location575 })576577 return files, summary, totals, nil578}579580// snapshotULOC converts the package-level ULOC maps into a sorted slice so581// the template can range deterministically. totalLines drives the DRYness582// number — unique lines / total lines, capped at 1.0.583func snapshotULOC(totalLines int64) *ULOCResult {584 ulocMutex.Lock()585 defer ulocMutex.Unlock()586587 res := &ULOCResult{588 Global: len(ulocGlobalCount),589 TotalLines: totalLines,590 }591 if totalLines > 0 {592 res.Dryness = float64(res.Global) / float64(totalLines)593 }594595 res.PerLanguage = make([]ULOCLanguage, 0, len(ulocLanguageCount))596 for lang, set := range ulocLanguageCount {597 res.PerLanguage = append(res.PerLanguage, ULOCLanguage{Language: lang, ULOC: len(set)})598 }599 sort.Slice(res.PerLanguage, func(i, j int) bool {600 if res.PerLanguage[i].ULOC != res.PerLanguage[j].ULOC {601 return res.PerLanguage[i].ULOC > res.PerLanguage[j].ULOC602 }603 return res.PerLanguage[i].Language < res.PerLanguage[j].Language604 })605606 return res607}608609// lineLengthBucketEdges defines the histogram bins used in the report — six610// 20-wide bins plus an open-ended tail.611var lineLengthBucketEdges = []struct {612 start, end int613 label string614}{615 {0, 20, "0–20"},616 {20, 40, "20–40"},617 {40, 60, "40–60"},618 {60, 80, "60–80"},619 {80, 100, "80–100"},620 {100, 120, "100–120"},621 {120, 0, "120+"},622}623624// lineLengthOutlierCount is the maximum number of longest-line outliers625// surfaced in the report. The tabular formatter only shows top-N; the626// HTML report has more vertical room so we collect a slightly larger set.627const lineLengthOutlierCount = 10628629// bucketLineLengths walks every file's per-line lengths into the histogram630// buckets and tracks mean / max / longest-N outliers. Returns nil if no file631// had per-line length data (e.g. MaxMean was off everywhere).632func bucketLineLengths(files []*FileJob) *LineLengthResult {633 res := &LineLengthResult{}634 res.Buckets = make([]LineLengthBucket, len(lineLengthBucketEdges))635 for i, e := range lineLengthBucketEdges {636 res.Buckets[i] = LineLengthBucket{Start: e.start, End: e.end, Label: e.label}637 }638639 type outlier struct {640 file, lang string641 length int642 }643 var (644 total int64645 count int64646 maxLength int647 outliers []outlier648 )649650 for _, fj := range files {651 fileMax := 0652 for _, ll := range fj.LineLength {653 count++654 total += int64(ll)655 if ll > maxLength {656 maxLength = ll657 }658 if ll > fileMax {659 fileMax = ll660 }661 for i, edge := range lineLengthBucketEdges {662 if edge.end == 0 {663 if ll >= edge.start {664 res.Buckets[i].Count++665 break666 }667 } else if ll >= edge.start && ll < edge.end {668 res.Buckets[i].Count++669 break670 }671 }672 }673 if fileMax > 0 {674 outliers = append(outliers, outlier{675 file: fj.Location,676 lang: fj.Language,677 length: fileMax,678 })679 }680 }681682 if count == 0 {683 return nil684 }685686 res.TotalLines = count687 res.Mean = float64(total) / float64(count)688 res.Max = maxLength689690 sort.Slice(outliers, func(i, j int) bool {691 if outliers[i].length != outliers[j].length {692 return outliers[i].length > outliers[j].length693 }694 return outliers[i].file < outliers[j].file695 })696 if len(outliers) > lineLengthOutlierCount {697 outliers = outliers[:lineLengthOutlierCount]698 }699 res.Outliers = make([]LineLengthOutlier, 0, len(outliers))700 for _, o := range outliers {701 res.Outliers = append(res.Outliers, LineLengthOutlier{702 File: o.file,703 Language: o.lang,704 LineLength: o.length,705 })706 }707 return res708}709710func hotspotsResultFromObserver(o *hotspotsObserver, window HistoryWindow) *HotspotsResult {711 res := &HotspotsResult{712 Window: window,713 TotalRaw: o.totalRaw,714 Available: true,715 }716 res.Records = make([]HotspotRow, 0, len(o.records))717 for _, r := range o.records {718 res.Records = append(res.Records, HotspotRow{719 File: r.File,720 Language: r.Language,721 Complexity: r.Complexity,722 Commits: r.Commits,723 LinesChanged: r.LinesChanged,724 Authors: len(r.Authors),725 CodeChurn: r.CodeChurn,726 CommentChurn: r.CommentChurn,727 Score: r.Score,728 })729 }730 return res731}732733func authorsResultFromObserver(o *historyAuthorsObserver, window HistoryWindow) *AuthorsResult {734 res := &AuthorsResult{735 Window: window,736 BusFactor: o.busFactor,737 BusAuthors: append([]string(nil), o.busAuthors...),738 BusCovered: o.busCovered,739 InWindowCode: o.inWindowCode,740 }741 res.Rows = make([]AuthorRow, 0, len(o.rows))742 for _, r := range o.rows {743 res.Rows = append(res.Rows, AuthorRow{744 Name: r.Name,745 Email: r.Email,746 Code: r.Code,747 Comment: r.Comment,748 Complexity: r.Complexity,749 Files: r.Files,750 OwnsPercent: r.OwnsPercent,751 InWindowPercent: r.InWindowPercent,752 LastCommit: r.LastCommit,753 Sentinel: r.Sentinel,754 })755 }756 return res757}758759func languageTimelineResultFromObserver(o *historyLanguagesObserver, window HistoryWindow) *LangTimelineResult {760 res := &LangTimelineResult{761 Window: window,762 Bucket: o.bucket,763 Buckets: o.bucket.N,764 }765 res.Rows = make([]LangTimelineRow, 0, len(o.rows))766 for _, r := range o.rows {767 row := LangTimelineRow{768 Language: r.Language,769 StartingLines: r.StartingLines,770 CodeNow: r.CodeNow,771 Change: r.Change,772 SharePercent: r.SharePercent,773 Deltas: append([]int64(nil), r.Deltas...),774 Trajectory: append([]int64(nil), r.Trajectory...),775 }776 res.Rows = append(res.Rows, row)777 }778 return res779}780781func authorTimelineResultFromObserver(o *historyAuthorTimelineObserver, window HistoryWindow) *AuthorTimelineResult {782 res := &AuthorTimelineResult{783 Window: window,784 Bucket: o.bucket,785 Buckets: o.bucket.N,786 }787 res.Rows = make([]AuthorTimelineRow, 0, len(o.rows))788 for _, r := range o.rows {789 row := AuthorTimelineRow{790 Name: r.Name,791 Email: r.Email,792 TotalCommits: r.TotalCommits,793 CodeDelta: r.CodeDelta,794 Series: make([]AuthorTimelineBucket, len(r.Series)),795 }796 for i, b := range r.Series {797 row.Series[i] = AuthorTimelineBucket{798 Commits: b.Commits,799 CodeDelta: b.CodeDelta,800 }801 }802 res.Rows = append(res.Rows, row)803 }804 return res805}
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.