Declared map variable without initialization; writing to a nil map causes a panic. Use make() to initialize
var ExtensionToLanguage = map[string][]string{}
1// SPDX-License-Identifier: MIT23package processor45import (6 "fmt"7 "io"8 "os"9 "path/filepath"10 "regexp"11 "runtime"12 "runtime/debug"13 "slices"14 "strconv"15 "strings"16 "sync"1718 "github.com/boyter/gocodewalker"19)2021// Version indicates the version of the application22var Version = "3.8.0 (beta)"2324// Flags set via the CLI which control how the output is displayed2526// Files indicates if there should be file output or not when formatting27var Files = false2829// Languages indicates if the command line should print out the supported languages30var Languages = false3132// Verbose enables verbose logging output33var Verbose = false3435// Debug enables debug logging output36var Debug = false3738// Trace enables trace logging output which is extremely verbose39var Trace = false4041// Duplicates enables duplicate file detection42var Duplicates = false4344// MinifiedGenerated enables minified/generated file detection45var MinifiedGenerated = false4647// IgnoreMinifiedGenerate printing counts for minified/generated files48var IgnoreMinifiedGenerate = false4950// MinifiedGeneratedLineByteLength number of bytes per average line to determine file is minified/generated51var MinifiedGeneratedLineByteLength = 2555253// Minified enables minified file detection54var Minified = false5556// IgnoreMinified ignore printing counts for minified files57var IgnoreMinified = false5859// Generated enables generated file detection60var Generated = false6162// GeneratedMarkers defines head markers for generated file detection63var GeneratedMarkers []string6465// IgnoreGenerated ignore printing counts for generated files66var IgnoreGenerated = false6768// Complexity toggles complexity calculation69var Complexity = false7071// More enables wider output with more information in formatter72var More = false7374// Cocomo toggles the COCOMO calculation75var Cocomo = false7677// SLOCCountFormat prints a more SLOCCount like COCOMO calculation78var SLOCCountFormat = false7980// CocomoProjectType allows the flipping between project types which impacts the calculation81var CocomoProjectType = "organic"8283// Size toggles the Size calculation84var Size = false8586// Draw horizontal borders between sections.87var HBorder = false8889// SizeUnit determines what size calculation is used for megabytes90var SizeUnit = "si"9192// Ci indicates if running inside a CI so to disable box drawing characters93var Ci = false9495// GitIgnore disables .gitignore checks96var GitIgnore = false9798// GitModuleIgnore disables .gitmodules checks99var GitModuleIgnore = false100101// Ignore disables ignore file checks102var Ignore = false103104// SccIgnore disables sccignore file checks105var SccIgnore = false106107// CountIgnore should we count ignore files?108var CountIgnore = false109110// DisableCheckBinary toggles checking for binary files using NUL bytes111var DisableCheckBinary = false112113// UlocMode toggles checking for binary files using NUL bytes114var UlocMode = false115116// Percent toggles checking for binary files using NUL bytes117var Percent = false118119// MaxMean sets the calculation of the max and mean line length120var MaxMean = false121122// Dryness toggles checking for binary files using NUL bytes123var Dryness = false124125// SortBy sets which column output in formatter should be sorted by126var SortBy = ""127128// Exclude is a regular expression which is used to exclude files from being processed129var Exclude = []string{}130131// CountAs is a rule for mapping known or new extensions to other rules132var CountAs = ""133134// Format sets the output format of the formatter135var Format = ""136137// FormatMulti is a rule for defining multiple output formats138var FormatMulti = ""139140// SQLProject is used to store the name for the SQL insert formats but is optional141var SQLProject = ""142143// RemapUnknown allows remapping of unknown files with a string to search the content for144var RemapUnknown = ""145146// RemapAll allows remapping of all files with a string to search the content for147var RemapAll = ""148149type remapRule struct {150 pattern []byte151 language string152}153154type remapConfig struct {155 all []remapRule156 unknown []remapRule157}158159type processorContext struct {160 remap remapConfig161}162163func parseRemapRules(value string) []remapRule {164 rules := []remapRule{}165166 for s := range strings.SplitSeq(value, ",") {167 t := strings.Split(s, ":")168 if len(t) == 2 {169 rules = append(rules, remapRule{170 pattern: []byte(t[0]),171 language: t[1],172 })173 }174 }175176 return rules177}178179func newRemapConfig(remapAll string, remapUnknown string) remapConfig {180 return remapConfig{181 all: parseRemapRules(remapAll),182 unknown: parseRemapRules(remapUnknown),183 }184}185186// MatchEngine selects how a CountRule pattern is interpreted. Glob is the187// default; regex is opt-in via the re: prefix.188type MatchEngine int189190const (191 // MatchGlob is the default. The pattern is a glob ('*' and '?') translated192 // to an anchored regex and matched as a full match against the path.193 MatchGlob MatchEngine = iota194 // MatchRegex treats the pattern as a raw (unanchored) RE2 regex. Opt in195 // with the re: prefix.196 MatchRegex197)198199// CountRule is the typed, library-facing form of a --count-as-pattern rule.200// It matches files by their path and relabels them to a new named category201// whose counting rules are cloned from an existing base language.202type CountRule struct {203 Engine MatchEngine // MatchGlob (the default) or MatchRegex204 Pattern string // glob or regex source205 Name string // new category display name206 BaseLanguage string // existing language whose counting rules are cloned207}208209// CountRules is the typed input set either directly by library users or by the210// CLI after parsing CountAsPattern. Setup happens in setupCountRules.211var CountRules []CountRule212213// CountAsPattern holds the raw repeatable --count-as-pattern flag values. Each214// is parsed into a CountRule at setup. Library users may set CountRules directly.215var CountAsPattern []string216217// compiledCountRule is the runtime form scanned by newFileJob218type compiledCountRule struct {219 re *regexp.Regexp220 name string221}222223var compiledCountRules []compiledCountRule224225// CurrencySymbol allows setting the currency symbol for cocomo project cost estimation226var CurrencySymbol = ""227228// FileOutput sets the file that output should be written to229var FileOutput = ""230231// PathDenyList sets the paths that should be skipped232var PathDenyList = []string{}233234// FileListQueueSize is the queue of files found and ready to be read into memory235var FileListQueueSize = runtime.NumCPU()236237// FileProcessJobWorkers is the number of workers that process the file collecting stats238var FileProcessJobWorkers = runtime.NumCPU() * 4239240// FileSummaryJobQueueSize is the queue used to hold processed file statistics before formatting241var FileSummaryJobQueueSize = runtime.NumCPU()242243// DirectoryWalkerJobWorkers is the number of workers which will walk the directory tree244var DirectoryWalkerJobWorkers = 8245246// AllowListExtensions is a list of extensions which are allowed to be processed247var AllowListExtensions = []string{}248249// ExcludeListExtensions is a list of extensions which should be ignored250var ExcludeListExtensions = []string{}251252// ExcludeFilename is a list of filenames which should be ignored253var ExcludeFilename = []string{}254255// AverageWage is the average wage in dollars used for the COCOMO cost estimate256var AverageWage int64 = 56286257258// Overhead is the overhead multiplier for corporate overhead (facilities, equipment, accounting, etc.)259var Overhead float64 = 2.4260261// EAF is the effort adjustment factor derived from the cost drivers, i.e. 1.0 if rated nominal262var EAF float64 = 1.0263264// Locomo toggles the LOCOMO (LLM Output COst MOdel) calculation265var Locomo = false266267// CostComparison enables both COCOMO and LOCOMO output for side-by-side comparison268var CostComparison = false269270// LocomoPresetName is the LLM model preset for pricing and throughput defaults271var LocomoPresetName = "medium"272273// LocomoInputPrice is the cost per 1M input tokens (overrides preset)274var LocomoInputPrice float64275var LocomoInputPriceSet = false276277// LocomoOutputPrice is the cost per 1M output tokens (overrides preset)278var LocomoOutputPrice float64279var LocomoOutputPriceSet = false280281// LocomoTPS is the output tokens per second (overrides preset)282var LocomoTPS float64283var LocomoTPSSet = false284285// LocomoReviewMinutesPerLine is the human review time per line of code in minutes286var LocomoReviewMinutesPerLine float64 = 0.01287288// LocomoConfig is the power-user config string "tokensPerLine,baseInputPerLine,complexityWeight,iterations,iterationWeight"289var LocomoConfig = ""290291// LocomoTokensPerLine is the average number of output tokens per line of code292var LocomoTokensPerLine float64 = 10293294// LocomoBaseInputPerLine is the base number of input tokens per output line295var LocomoBaseInputPerLine float64 = 20296297// LocomoComplexityWeight is the scaling weight applied to sqrt(complexity density) for input tokens298var LocomoComplexityWeight float64 = 5299300// LocomoIterations is the base number of iteration/retry attempts301var LocomoIterations float64 = 1.5302303// LocomoIterationWeight is the scaling weight for complexity-driven retries304var LocomoIterationWeight float64 = 2305306// LocomoCyclesOverride is the user-supplied iteration factor override (--locomo-cycles)307var LocomoCyclesOverride float64308309// LocomoCyclesSet indicates whether --locomo-cycles was explicitly set310var LocomoCyclesSet = false311312// GcFileCount is the number of files to process before turning the GC back on313var GcFileCount = 10000314var gcPercent = -1315var isLazy = false316317// NoLarge if set true will ignore files over a certain number of lines or bytes318var NoLarge = false319320// IncludeSymLinks if set true will count symlink files321var IncludeSymLinks = false322323// LargeLineCount number of lines before being counted as a large file based on https://github.com/pinpt/ripsrc/blob/master/ripsrc/fileinfo/fileinfo.go#L44324var LargeLineCount int64 = 40000325326// LargeByteCount number of bytes before being counted as a large file based on https://github.com/pinpt/ripsrc/blob/master/ripsrc/fileinfo/fileinfo.go#L44327var LargeByteCount int64 = 1000000328329// Hotspots toggles the hotspots git-history report330var Hotspots = false331332// ByAuthor toggles the author-rollup git-history report333var ByAuthor = false334335// Timeline selects an over-time view. With ByAuthor, runs the author336// timeline report (plan 04); alone, runs the languages-over-time report337// (plan 05). With Hotspots set, the combination errors out.338var Timeline = false339340// HistoryBuckets is the time-bucket resolution for the timeline reports.341// Wired to --buckets in main.go; default 60.342var HistoryBuckets = 60343344// FoldAuthors enables the name+domain identity folding fallback applied345// after the mailmap. Toggled off via --no-fold-authors.346var FoldAuthors = true347348// DirFilePaths is not set via flags but by arguments following the flags for file or directory to process349var DirFilePaths = []string{}350351// ExtensionToLanguage is loaded from the JSON that is in constants.go352var ExtensionToLanguage = map[string][]string{}353354// ShebangLookup loaded from the JSON in constants.go contains shebang lookups355var ShebangLookup = map[string][]string{}356357// FilenameToLanguage similar to ExtensionToLanguage loaded from the JSON in constants.go358var FilenameToLanguage = map[string]string{}359360// LanguageFeatures contains the processed languages from processLanguageFeature361var LanguageFeatures = map[string]LanguageFeature{}362363// LanguageFeaturesMutex is the shared mutex used to control getting and setting of language features364// used rather than sync.Map because it turned out to be marginally faster365var LanguageFeaturesMutex = sync.Mutex{}366367// Start time in milli seconds in case we want the total time368var startTimeMilli = makeTimestampMilli()369370// ConfigureGc needs to be set outside of ProcessConstants because it should only be enabled in command line371// mode https://github.com/boyter/scc/issues/32372func ConfigureGc() {373 gcPercent = debug.SetGCPercent(gcPercent)374}375376// ConfigureLazy is a simple setter used to turn on lazy loading used only by command line377func ConfigureLazy(lazy bool) {378 isLazy = lazy379}380381// ProcessConstants is responsible for setting up the language features based on the JSON file that is stored in constants382// Needs to be called at least once in order for anything to actually happen383func ProcessConstants() {384 startTime := makeTimestampNano()385 for name, value := range languageDatabase {386 for _, ext := range value.Extensions {387 ExtensionToLanguage[ext] = append(ExtensionToLanguage[ext], name)388 }389390 for _, fname := range value.FileNames {391 FilenameToLanguage[fname] = name392 }393394 if len(value.SheBangs) != 0 {395 ShebangLookup[name] = value.SheBangs396 }397 }398399 // If we have anything in CountAs set it up now400 if len(CountAs) != 0 {401 setupCountAs()402 }403404 printTraceF("nanoseconds build extension to language: %d", makeTimestampNano()-startTime)405406 // Set up any path pattern count rules, minting new categories backed by a407 // base language. The function clones the base language and builds its408 // features so counting works in both lazy and non-lazy modes.409 if len(CountAsPattern) != 0 || len(CountRules) != 0 {410 setupCountRules()411 }412413 // Configure COCOMO setting414 _, ok := projectType[strings.ToLower(CocomoProjectType)]415 if !ok {416 // let's see if we can turn it into a custom one417 spl := strings.Split(CocomoProjectType, ",")418 val := []float64{}419 if len(spl) == 5 {420 // let's try to convert to float if we can421 for i := 1; i < 5; i++ {422 f, err := strconv.ParseFloat(spl[i], 64)423 if err == nil {424 val = append(val, f)425 }426 }427 }428429 if len(val) == 4 {430 projectType[CocomoProjectType] = val431 } else {432 // if nothing matches fall back to organic433 CocomoProjectType = "organic"434 }435 }436437 // If lazy is set then we want to load in the features as we find them not in one go438 // however otherwise being used as a library so just load them all in439 if !isLazy {440 startTime = makeTimestampMilli()441 for name, value := range languageDatabase {442 processLanguageFeature(name, value)443 }444445 printTraceF("milliseconds build language features: %d", makeTimestampMilli()-startTime)446 } else {447 printTrace("configured to lazy load language features")448 }449450 // Fix for https://github.com/boyter/scc/issues/250451 fixedPath := make([]string, 0, len(PathDenyList))452 for _, path := range PathDenyList {453 fixedPath = append(fixedPath, strings.TrimRight(path, "/"))454 }455 PathDenyList = fixedPath456}457458// Configure and setup any count-as params the use has supplied459func setupCountAs() {460 for s := range strings.SplitSeq(CountAs, ",") {461 t := strings.Split(s, ":")462 if len(t) != 2 {463 printError(fmt.Sprintf("ignoring malformed count-as rule %q: expected format <from>:<to>", s))464 continue465 }466467 // There are two cases here.468 // first is they provide the name e.g. "Cargo Lock"469 // second is that the user supplies the extension EG wsdl470 // we should support BOTH cases471 // always remember we only need to validate t[1] as that's the one472 // that tells us where we are trying to map473 target, ok := resolveBaseLanguage(t[1])474 if ok {475 ExtensionToLanguage[strings.ToLower(t[0])] = []string{target}476 printDebugF("set to count extension: %s as language %s", t[0], target)477 continue478 }479480 // The target t[1] matched neither a known language name nor a known481 // extension, so no mapping was registered. Warn rather than silently482 // ignoring the rule, since count-as cannot mint new categories yet.483 printError(fmt.Sprintf("ignoring count-as rule %q: target %q is not a known language or extension", s, t[1]))484 }485}486487// resolveBaseLanguage resolves a user supplied target to a canonical language488// name. It first tries to match a language name (most reliable as names are489// unique) and falls back to matching a known extension. Returns the canonical490// language name and whether it was resolved.491func resolveBaseLanguage(target string) (string, bool) {492 // Match by language name which is the most reliable as the name is unique493 for name := range languageDatabase {494 if strings.EqualFold(name, target) {495 return name, true496 }497 }498499 // Fall back to extension match, note this is less reliable as some500 // languages share extensions so we take the first registered language501 langs, ok := ExtensionToLanguage[strings.ToLower(target)]502 if ok && len(langs) != 0 {503 return langs[0], true504 }505506 return "", false507}508509// parseCountAsPattern parses a single --count-as-pattern rule of the form510// [engine:]pattern:name:baselang into a CountRule.511//512// The engine prefix is optional and the pattern is treated as a GLOB BY513// DEFAULT; prefix with re: to opt into a regex (or glob: to be explicit). We514// keep glob and regex as distinct modes rather than inferring, because the same515// string is valid in both engines with different meaning (e.g. "foo.rb" matches516// only foo.rb as a glob but also fooXrb as a regex), so guessing would silently517// match the wrong files.518//519// Because regex patterns and paths legitimately contain ':', name and baselang520// are peeled from the right and the pattern is whatever remains in between.521func parseCountAsPattern(s string) (CountRule, error) {522 engine := MatchGlob523 rest := s524525 switch {526 case strings.HasPrefix(rest, "re:"):527 engine = MatchRegex528 rest = rest[len("re:"):]529 case strings.HasPrefix(rest, "glob:"):530 engine = MatchGlob531 rest = rest[len("glob:"):]532 }533534 // baselang = after the last ':', name = between the 2nd-last and last ':'535 lastColon := strings.LastIndex(rest, ":")536 if lastColon == -1 {537 return CountRule{}, fmt.Errorf("expected format [engine:]pattern:name:baselang")538 }539 baseLanguage := rest[lastColon+1:]540541 nameColon := strings.LastIndex(rest[:lastColon], ":")542 if nameColon == -1 {543 return CountRule{}, fmt.Errorf("expected format [engine:]pattern:name:baselang")544 }545 name := rest[nameColon+1 : lastColon]546 pattern := rest[:nameColon]547548 if pattern == "" || name == "" || baseLanguage == "" {549 return CountRule{}, fmt.Errorf("pattern, name and baselang must all be non-empty")550 }551552 return CountRule{Engine: engine, Pattern: pattern, Name: name, BaseLanguage: baseLanguage}, nil553}554555// globToRegex converts a simple glob into an anchored regex. Glob is the556// default --count-as-pattern engine. Only '*' (any run of characters) and '?'557// (single character) are special, everything else is matched literally. The558// result is anchored as a full match.559func globToRegex(glob string) string {560 var b strings.Builder561 b.WriteByte('^')562 for _, r := range glob {563 switch r {564 case '*':565 b.WriteString(".*")566 case '?':567 b.WriteByte('.')568 default:569 b.WriteString(regexp.QuoteMeta(string(r)))570 }571 }572 b.WriteByte('$')573 return b.String()574}575576// setupCountRules parses CountAsPattern into CountRules, compiles each rule and577// registers a cloned language under its new name so counting works. Invalid578// rules are reported to stderr and skipped, consistent with --count-as.579func setupCountRules() {580 for _, s := range CountAsPattern {581 rule, err := parseCountAsPattern(s)582 if err != nil {583 printError(fmt.Sprintf("ignoring malformed count-as-pattern rule %q: %s", s, err))584 continue585 }586 CountRules = append(CountRules, rule)587 }588589 for _, rule := range CountRules {590 base, ok := resolveBaseLanguage(rule.BaseLanguage)591 if !ok {592 printError(fmt.Sprintf("ignoring count-as-pattern rule for %q: base language %q is not a known language or extension", rule.Name, rule.BaseLanguage))593 continue594 }595596 source := rule.Pattern597 if rule.Engine == MatchGlob {598 source = globToRegex(rule.Pattern)599 }600601 re, err := regexp.Compile(source)602 if err != nil {603 printError(fmt.Sprintf("ignoring count-as-pattern rule for %q: invalid pattern %q: %s", rule.Name, rule.Pattern, err))604 continue605 }606607 // Clone the base language under the new name so it has counting rules,608 // clearing the matchers so the minted category never participates in609 // normal extension/filename/shebang detection.610 cloned := languageDatabase[base]611 cloned.Extensions = nil612 cloned.FileNames = nil613 cloned.SheBangs = nil614 languageDatabase[rule.Name] = cloned615616 // Populate features now in non-lazy mode, otherwise LoadLanguageFeature617 // will build them on first use since the name is in languageDatabase.618 if !isLazy {619 processLanguageFeature(rule.Name, cloned)620 }621622 compiledCountRules = append(compiledCountRules, compiledCountRule{re: re, name: rule.Name})623 printDebugF("set to count path matching %q as new language %s based on %s", rule.Pattern, rule.Name, base)624 }625}626627// LoadLanguageFeature will load a single feature as requested given the name628func LoadLanguageFeature(loadName string) {629 if !isLazy {630 return631 }632633 // Check if already loaded and if so return because we don't need to do it again634 LanguageFeaturesMutex.Lock()635 _, ok := LanguageFeatures[loadName]636 LanguageFeaturesMutex.Unlock()637 if ok {638 return639 }640641 var name string642 var value Language643644 for name, value = range languageDatabase {645 if name == loadName {646 break647 }648 }649650 startTime := makeTimestampNano()651 processLanguageFeature(loadName, value)652 printTraceF("nanoseconds to build language %s features: %d", loadName, makeTimestampNano()-startTime)653}654655func processLanguageFeature(name string, value Language) {656 complexityTrie := &Trie{}657 slCommentTrie := &Trie{}658 mlCommentTrie := &Trie{}659 stringTrie := &Trie{}660 tokenTrie := &Trie{}661 keywordBytes := make([][]byte, 0, len(value.Keywords))662 postfixExcludes := make([][]byte, 0, len(value.ComplexityChecksPostfixExcludes))663664 complexityMask := byte(0)665 singleLineCommentMask := byte(0)666 multiLineCommentMask := byte(0)667 stringMask := byte(0)668 processMask := byte(0)669670 for _, v := range value.ComplexityChecks {671 complexityMask |= v[0]672 complexityTrie.Insert(TComplexity, []byte(v))673 if !Complexity {674 tokenTrie.Insert(TComplexity, []byte(v))675 }676 }677 if !Complexity {678 processMask |= complexityMask679 }680681 for _, v := range value.ComplexityChecksPostfix {682 if !Complexity {683 tokenTrie.Insert(TComplexityPostfix, []byte(v))684 processMask |= v[0]685 }686 }687688 for _, v := range value.ComplexityChecksPostfixExcludes {689 postfixExcludes = append(postfixExcludes, []byte(v))690 }691692 for _, v := range value.LineComment {693 singleLineCommentMask |= v[0]694 slCommentTrie.Insert(TSlcomment, []byte(v))695 tokenTrie.Insert(TSlcomment, []byte(v))696 }697 processMask |= singleLineCommentMask698699 for _, v := range value.MultiLine {700 multiLineCommentMask |= v[0][0]701 mlCommentTrie.InsertClose(TMlcomment, []byte(v[0]), []byte(v[1]))702 tokenTrie.InsertClose(TMlcomment, []byte(v[0]), []byte(v[1]))703 }704 processMask |= multiLineCommentMask705706 for _, v := range value.Quotes {707 stringMask |= v.Start[0]708 stringTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))709 tokenTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))710 }711 processMask |= stringMask712713 for _, v := range value.Keywords {714 keywordBytes = append(keywordBytes, []byte(v))715 }716717 LanguageFeaturesMutex.Lock()718 LanguageFeatures[name] = LanguageFeature{719 Complexity: complexityTrie,720 MultiLineComments: mlCommentTrie,721 MultiLine: value.MultiLine,722 SingleLineComments: slCommentTrie,723 LineComment: value.LineComment,724 Strings: stringTrie,725 Tokens: tokenTrie,726 Nested: value.NestedMultiLine,727 PostfixExcludes: postfixExcludes,728 ComplexityCheckMask: complexityMask,729 MultiLineCommentMask: multiLineCommentMask,730 SingleLineCommentMask: singleLineCommentMask,731 StringCheckMask: stringMask,732 ProcessMask: processMask,733 Keywords: value.Keywords,734 KeywordBytes: keywordBytes,735 Quotes: value.Quotes,736 }737 LanguageFeaturesMutex.Unlock()738}739740func processFlags() {741 // If wide/more mode is enabled we want the complexity calculation742 // to happen regardless as that is the only purpose of the flag743 if More && Complexity {744 Complexity = false745 }746747 // If ignore minified/generated is on ensure we turn on the code to calculate that748 if IgnoreMinifiedGenerate {749 MinifiedGenerated = true750 IgnoreMinified = true751 IgnoreGenerated = true752 }753754 if MinifiedGenerated {755 Minified = true756 Generated = true757 }758759 if IgnoreMinified {760 Minified = true761 }762763 if IgnoreGenerated {764 Generated = true765 }766767 if Dryness {768 UlocMode = true769 }770771 printDebugF("Path Deny List: %v", PathDenyList)772 printDebugF("Sort By: %s", SortBy)773 printDebugF("White List: %v", AllowListExtensions)774 printDebugF("Files Output: %t", Files)775 printDebugF("Verbose: %t", Verbose)776 printDebugF("Duplicates Detection: %t", Duplicates)777 printDebugF("Complexity Calculation: %t", !Complexity)778 printDebugF("Wide: %t", More)779 // If cost-comparison is enabled, turn on both COCOMO and LOCOMO780 if CostComparison {781 Cocomo = false782 Locomo = true783 }784785 // LOCOMO needs complexity data to produce accurate estimates.786 // If complexity was disabled via --no-complexity, force it back on.787 if Locomo && Complexity {788 Complexity = false789 }790791 printDebugF("Average Wage: %d", AverageWage)792 printDebugF("Cocomo: %t", !Cocomo)793 printDebugF("Locomo: %t", Locomo)794 printDebugF("Minified/Generated Detection: %t/%t", Minified, Generated)795 printDebugF("Ignore Minified/Generated: %t/%t", IgnoreMinified, IgnoreGenerated)796 printDebugF("IncludeSymLinks: %t", IncludeSymLinks)797 printDebugF("Uloc: %t", UlocMode)798 printDebugF("Dryness: %t", Dryness)799}800801// LanguageDatabase provides access to the internal language database802// useful for consuming applications wanting to consume and use803func LanguageDatabase() map[string]Language {804 return languageDatabase805}806807func PrintLanguages(dst io.Writer) {808 names := make([]string, 0, len(languageDatabase))809 for key := range languageDatabase {810 names = append(names, key)811 }812813 slices.SortFunc(names, func(a, b string) int {814 return strings.Compare(strings.ToLower(a), strings.ToLower(b))815 })816817 for _, name := range names {818 _, _ = fmt.Fprintf(dst, "%s (%s)\n", name, strings.Join(append(languageDatabase[name].Extensions, languageDatabase[name].FileNames...), ","))819 }820}821822// global variables to deal with ULOC calculations823var ulocMutex = sync.Mutex{}824var ulocGlobalCount = map[string]struct{}{}825var ulocLanguageCount = map[string]map[string]struct{}{}826827// Process is the main entry point of the command line it sets everything up and starts running828func Process() {829 if Languages {830 PrintLanguages(os.Stdout)831 return832 }833834 ProcessConstants()835 processFlags()836 cleanVisitedPaths()837838 // Clean up any invalid arguments before setting everything up839 if len(DirFilePaths) == 0 {840 DirFilePaths = append(DirFilePaths, ".")841 }842843 // --report mode short-circuits the normal format dispatch and writes a844 // self-contained HTML report. Mutually exclusive with --format / -f: if845 // the user passed both, warn on stderr and let --report win.846 if ReportOut != "" {847 if Format != "" && Format != "tabular" {848 fmt.Fprintf(os.Stderr, "warning: --report overrides --format=%s\n", Format)849 }850 parseReportSkip(ReportSkip)851 if len(DirFilePaths) > 1 {852 fmt.Fprintf(os.Stderr, "warning: --report only analyses the first positional path (%s); other paths ignored\n", DirFilePaths[0])853 }854 if err := runReport(DirFilePaths); err != nil {855 fmt.Println(err)856 os.Exit(1)857 }858 return859 }860861 if Hotspots && (ByAuthor || Timeline) {862 fmt.Println("--hotspots is mutually exclusive with --by-author / --timeline; pick one report")863 os.Exit(1)864 }865866 if Hotspots || ByAuthor || Timeline {867 if err := validateHistoryFlags(os.Stderr); err != nil {868 fmt.Println(err)869 os.Exit(1)870 }871 }872873 if Hotspots {874 if err := runHotspotsReport(DirFilePaths[0]); err != nil {875 fmt.Println(err)876 os.Exit(1)877 }878 return879 }880881 if ByAuthor && Timeline {882 if err := runAuthorTimelineReport(DirFilePaths[0]); err != nil {883 fmt.Println(err)884 os.Exit(1)885 }886 return887 }888889 if ByAuthor {890 if err := runAuthorsReport(DirFilePaths[0]); err != nil {891 fmt.Println(err)892 os.Exit(1)893 }894 return895 }896897 if Timeline {898 if err := runLanguagesTimelineReport(DirFilePaths[0]); err != nil {899 fmt.Println(err)900 os.Exit(1)901 }902 return903 }904905 filePaths := []string{}906 dirPaths := []string{}907908 // Check if the paths or files added exist and exit if not909 for _, f := range DirFilePaths {910 fpath := filepath.Clean(f)911912 s, err := os.Stat(fpath)913 if err != nil {914 fmt.Println("file or directory could not be read: " + fpath)915 os.Exit(1)916 }917918 if s.IsDir() {919 dirPaths = append(dirPaths, fpath)920 } else {921 filePaths = append(filePaths, fpath)922 }923 }924925 SortBy = strings.ToLower(SortBy)926 ctx := processorContext{remap: newRemapConfig(RemapAll, RemapUnknown)}927928 printDebugF("NumCPU: %d", runtime.NumCPU())929 printDebugF("SortBy: %s", SortBy)930 printDebugF("PathDenyList: %v", PathDenyList)931932 potentialFilesQueue := make(chan *gocodewalker.File, FileListQueueSize) // files that pass the .gitignore checks933 fileListQueue := make(chan *FileJob, FileListQueueSize) // Files ready to be read from disk934 fileSummaryJobQueue := make(chan *FileJob, FileSummaryJobQueueSize) // Files ready to be summarised935936 fileWalker := gocodewalker.NewParallelFileWalker(dirPaths, potentialFilesQueue)937 fileWalker.SetErrorHandler(func(e error) bool {938 printError(e.Error())939 return true940 })941 fileWalker.IgnoreGitIgnore = GitIgnore942 fileWalker.IgnoreIgnoreFile = Ignore943 fileWalker.IgnoreGitModules = GitModuleIgnore944 fileWalker.IncludeHidden = true945 fileWalker.ExcludeDirectory = PathDenyList946 fileWalker.SetConcurrency(DirectoryWalkerJobWorkers)947948 if !SccIgnore {949 fileWalker.CustomIgnore = []string{".sccignore"}950 }951952 var excludePathRegexes []*regexp.Regexp953 for _, exclude := range Exclude {954 regexpResult, err := regexp.Compile(exclude)955 if err == nil {956 fileWalker.ExcludeFilenameRegex = append(fileWalker.ExcludeFilenameRegex, regexpResult)957 fileWalker.ExcludeDirectoryRegex = append(fileWalker.ExcludeDirectoryRegex, regexpResult)958 excludePathRegexes = append(excludePathRegexes, regexpResult)959 } else {960 printError(err.Error())961 }962 }963964 go func() {965 err := fileWalker.Start()966 if err != nil {967 printError(err.Error())968 }969 }()970971 go func() {972 for _, f := range filePaths {973 fileInfo, err := os.Lstat(f)974 if err != nil {975 continue976 }977978 fileJob := newFileJob(f, f, fileInfo)979 if fileJob != nil {980 fileListQueue <- fileJob981 }982 }983984 for fi := range potentialFilesQueue {985 shouldExclude := false986 for _, re := range excludePathRegexes {987 if re.MatchString(fi.Location) {988 shouldExclude = true989 break990 }991 }992 if shouldExclude {993 continue994 }995996 fileInfo, err := os.Lstat(fi.Location)997 if err != nil {998 continue999 }10001001 if !fileInfo.IsDir() {1002 fileJob := newFileJob(fi.Location, fi.Filename, fileInfo)1003 if fileJob != nil {1004 fileListQueue <- fileJob1005 }1006 }1007 }1008 close(fileListQueue)1009 }()10101011 go ctx.fileProcessorWorker(fileListQueue, fileSummaryJobQueue)10121013 result := fileSummarize(fileSummaryJobQueue)1014 if FileOutput == "" {1015 fmt.Print(result)1016 } else {1017 _ = os.WriteFile(FileOutput, []byte(result), 0644)1018 fmt.Println("results written to " + FileOutput)1019 }1020}
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.