Declared map variable without initialization; writing to a nil map causes a panic. Use make() to initialize
var ExtensionToLanguage = map[string][]string{}
1// SPDX-License-Identifier: MIT23package processor45import (6 "fmt"7 "io"8 "os"9 "path/filepath"10 "regexp"11 "runtime"12 "runtime/debug"13 "slices"14 "strconv"15 "strings"16 "sync"1718 "github.com/boyter/gocodewalker"19)2021// Version indicates the version of the application22var Version = "3.7.0"2324// Flags set via the CLI which control how the output is displayed2526// Files indicates if there should be file output or not when formatting27var Files = false2829// Languages indicates if the command line should print out the supported languages30var Languages = false3132// Verbose enables verbose logging output33var Verbose = false3435// Debug enables debug logging output36var Debug = false3738// Trace enables trace logging output which is extremely verbose39var Trace = false4041// Duplicates enables duplicate file detection42var Duplicates = false4344// MinifiedGenerated enables minified/generated file detection45var MinifiedGenerated = false4647// IgnoreMinifiedGenerate printing counts for minified/generated files48var IgnoreMinifiedGenerate = false4950// MinifiedGeneratedLineByteLength number of bytes per average line to determine file is minified/generated51var MinifiedGeneratedLineByteLength = 2555253// Minified enables minified file detection54var Minified = false5556// IgnoreMinified ignore printing counts for minified files57var IgnoreMinified = false5859// Generated enables generated file detection60var Generated = false6162// GeneratedMarkers defines head markers for generated file detection63var GeneratedMarkers []string6465// IgnoreGenerated ignore printing counts for generated files66var IgnoreGenerated = false6768// Complexity toggles complexity calculation69var Complexity = false7071// More enables wider output with more information in formatter72var More = false7374// Cocomo toggles the COCOMO calculation75var Cocomo = false7677// SLOCCountFormat prints a more SLOCCount like COCOMO calculation78var SLOCCountFormat = false7980// CocomoProjectType allows the flipping between project types which impacts the calculation81var CocomoProjectType = "organic"8283// Size toggles the Size calculation84var Size = false8586// Draw horizontal borders between sections.87var HBorder = false8889// SizeUnit determines what size calculation is used for megabytes90var SizeUnit = "si"9192// Ci indicates if running inside a CI so to disable box drawing characters93var Ci = false9495// GitIgnore disables .gitignore checks96var GitIgnore = false9798// GitModuleIgnore disables .gitmodules checks99var GitModuleIgnore = false100101// Ignore disables ignore file checks102var Ignore = false103104// SccIgnore disables sccignore file checks105var SccIgnore = false106107// CountIgnore should we count ignore files?108var CountIgnore = false109110// DisableCheckBinary toggles checking for binary files using NUL bytes111var DisableCheckBinary = false112113// UlocMode toggles checking for binary files using NUL bytes114var UlocMode = false115116// Percent toggles checking for binary files using NUL bytes117var Percent = false118119// MaxMean sets the calculation of the max and mean line length120var MaxMean = false121122// Dryness toggles checking for binary files using NUL bytes123var Dryness = false124125// SortBy sets which column output in formatter should be sorted by126var SortBy = ""127128// Exclude is a regular expression which is used to exclude files from being processed129var Exclude = []string{}130131// CountAs is a rule for mapping known or new extensions to other rules132var CountAs = ""133134// Format sets the output format of the formatter135var Format = ""136137// FormatMulti is a rule for defining multiple output formats138var FormatMulti = ""139140// SQLProject is used to store the name for the SQL insert formats but is optional141var SQLProject = ""142143// RemapUnknown allows remapping of unknown files with a string to search the content for144var RemapUnknown = ""145146// RemapAll allows remapping of all files with a string to search the content for147var RemapAll = ""148149type remapRule struct {150 pattern []byte151 language string152}153154type remapConfig struct {155 all []remapRule156 unknown []remapRule157}158159type processorContext struct {160 remap remapConfig161}162163func parseRemapRules(value string) []remapRule {164 rules := []remapRule{}165166 for s := range strings.SplitSeq(value, ",") {167 t := strings.Split(s, ":")168 if len(t) == 2 {169 rules = append(rules, remapRule{170 pattern: []byte(t[0]),171 language: t[1],172 })173 }174 }175176 return rules177}178179func newRemapConfig(remapAll string, remapUnknown string) remapConfig {180 return remapConfig{181 all: parseRemapRules(remapAll),182 unknown: parseRemapRules(remapUnknown),183 }184}185186// CurrencySymbol allows setting the currency symbol for cocomo project cost estimation187var CurrencySymbol = ""188189// FileOutput sets the file that output should be written to190var FileOutput = ""191192// PathDenyList sets the paths that should be skipped193var PathDenyList = []string{}194195// FileListQueueSize is the queue of files found and ready to be read into memory196var FileListQueueSize = runtime.NumCPU()197198// FileProcessJobWorkers is the number of workers that process the file collecting stats199var FileProcessJobWorkers = runtime.NumCPU() * 4200201// FileSummaryJobQueueSize is the queue used to hold processed file statistics before formatting202var FileSummaryJobQueueSize = runtime.NumCPU()203204// DirectoryWalkerJobWorkers is the number of workers which will walk the directory tree205var DirectoryWalkerJobWorkers = 8206207// AllowListExtensions is a list of extensions which are allowed to be processed208var AllowListExtensions = []string{}209210// ExcludeListExtensions is a list of extensions which should be ignored211var ExcludeListExtensions = []string{}212213// ExcludeFilename is a list of filenames which should be ignored214var ExcludeFilename = []string{}215216// AverageWage is the average wage in dollars used for the COCOMO cost estimate217var AverageWage int64 = 56286218219// Overhead is the overhead multiplier for corporate overhead (facilities, equipment, accounting, etc.)220var Overhead float64 = 2.4221222// EAF is the effort adjustment factor derived from the cost drivers, i.e. 1.0 if rated nominal223var EAF float64 = 1.0224225// Locomo toggles the LOCOMO (LLM Output COst MOdel) calculation226var Locomo = false227228// CostComparison enables both COCOMO and LOCOMO output for side-by-side comparison229var CostComparison = false230231// LocomoPresetName is the LLM model preset for pricing and throughput defaults232var LocomoPresetName = "medium"233234// LocomoInputPrice is the cost per 1M input tokens (overrides preset)235var LocomoInputPrice float64236var LocomoInputPriceSet = false237238// LocomoOutputPrice is the cost per 1M output tokens (overrides preset)239var LocomoOutputPrice float64240var LocomoOutputPriceSet = false241242// LocomoTPS is the output tokens per second (overrides preset)243var LocomoTPS float64244var LocomoTPSSet = false245246// LocomoReviewMinutesPerLine is the human review time per line of code in minutes247var LocomoReviewMinutesPerLine float64 = 0.01248249// LocomoConfig is the power-user config string "tokensPerLine,baseInputPerLine,complexityWeight,iterations,iterationWeight"250var LocomoConfig = ""251252// LocomoTokensPerLine is the average number of output tokens per line of code253var LocomoTokensPerLine float64 = 10254255// LocomoBaseInputPerLine is the base number of input tokens per output line256var LocomoBaseInputPerLine float64 = 20257258// LocomoComplexityWeight is the scaling weight applied to sqrt(complexity density) for input tokens259var LocomoComplexityWeight float64 = 5260261// LocomoIterations is the base number of iteration/retry attempts262var LocomoIterations float64 = 1.5263264// LocomoIterationWeight is the scaling weight for complexity-driven retries265var LocomoIterationWeight float64 = 2266267// LocomoCyclesOverride is the user-supplied iteration factor override (--locomo-cycles)268var LocomoCyclesOverride float64269270// LocomoCyclesSet indicates whether --locomo-cycles was explicitly set271var LocomoCyclesSet = false272273// GcFileCount is the number of files to process before turning the GC back on274var GcFileCount = 10000275var gcPercent = -1276var isLazy = false277278// NoLarge if set true will ignore files over a certain number of lines or bytes279var NoLarge = false280281// IncludeSymLinks if set true will count symlink files282var IncludeSymLinks = false283284// LargeLineCount number of lines before being counted as a large file based on https://github.com/pinpt/ripsrc/blob/master/ripsrc/fileinfo/fileinfo.go#L44285var LargeLineCount int64 = 40000286287// LargeByteCount number of bytes before being counted as a large file based on https://github.com/pinpt/ripsrc/blob/master/ripsrc/fileinfo/fileinfo.go#L44288var LargeByteCount int64 = 1000000289290// DirFilePaths is not set via flags but by arguments following the flags for file or directory to process291var DirFilePaths = []string{}292293// ExtensionToLanguage is loaded from the JSON that is in constants.go294var ExtensionToLanguage = map[string][]string{}295296// ShebangLookup loaded from the JSON in constants.go contains shebang lookups297var ShebangLookup = map[string][]string{}298299// FilenameToLanguage similar to ExtensionToLanguage loaded from the JSON in constants.go300var FilenameToLanguage = map[string]string{}301302// LanguageFeatures contains the processed languages from processLanguageFeature303var LanguageFeatures = map[string]LanguageFeature{}304305// LanguageFeaturesMutex is the shared mutex used to control getting and setting of language features306// used rather than sync.Map because it turned out to be marginally faster307var LanguageFeaturesMutex = sync.Mutex{}308309// Start time in milli seconds in case we want the total time310var startTimeMilli = makeTimestampMilli()311312// ConfigureGc needs to be set outside of ProcessConstants because it should only be enabled in command line313// mode https://github.com/boyter/scc/issues/32314func ConfigureGc() {315 gcPercent = debug.SetGCPercent(gcPercent)316}317318// ConfigureLazy is a simple setter used to turn on lazy loading used only by command line319func ConfigureLazy(lazy bool) {320 isLazy = lazy321}322323// ProcessConstants is responsible for setting up the language features based on the JSON file that is stored in constants324// Needs to be called at least once in order for anything to actually happen325func ProcessConstants() {326 startTime := makeTimestampNano()327 for name, value := range languageDatabase {328 for _, ext := range value.Extensions {329 ExtensionToLanguage[ext] = append(ExtensionToLanguage[ext], name)330 }331332 for _, fname := range value.FileNames {333 FilenameToLanguage[fname] = name334 }335336 if len(value.SheBangs) != 0 {337 ShebangLookup[name] = value.SheBangs338 }339 }340341 // If we have anything in CountAs set it up now342 if len(CountAs) != 0 {343 setupCountAs()344 }345346 printTraceF("nanoseconds build extension to language: %d", makeTimestampNano()-startTime)347348 // Configure COCOMO setting349 _, ok := projectType[strings.ToLower(CocomoProjectType)]350 if !ok {351 // let's see if we can turn it into a custom one352 spl := strings.Split(CocomoProjectType, ",")353 val := []float64{}354 if len(spl) == 5 {355 // let's try to convert to float if we can356 for i := 1; i < 5; i++ {357 f, err := strconv.ParseFloat(spl[i], 64)358 if err == nil {359 val = append(val, f)360 }361 }362 }363364 if len(val) == 4 {365 projectType[CocomoProjectType] = val366 } else {367 // if nothing matches fall back to organic368 CocomoProjectType = "organic"369 }370 }371372 // If lazy is set then we want to load in the features as we find them not in one go373 // however otherwise being used as a library so just load them all in374 if !isLazy {375 startTime = makeTimestampMilli()376 for name, value := range languageDatabase {377 processLanguageFeature(name, value)378 }379380 printTraceF("milliseconds build language features: %d", makeTimestampMilli()-startTime)381 } else {382 printTrace("configured to lazy load language features")383 }384385 // Fix for https://github.com/boyter/scc/issues/250386 fixedPath := make([]string, 0, len(PathDenyList))387 for _, path := range PathDenyList {388 fixedPath = append(fixedPath, strings.TrimRight(path, "/"))389 }390 PathDenyList = fixedPath391}392393// Configure and setup any count-as params the use has supplied394func setupCountAs() {395 for s := range strings.SplitSeq(CountAs, ",") {396 t := strings.Split(s, ":")397 if len(t) == 2 {398399 identified := false400401 // There are two cases here.402 // first is they provide the name e.g. "Cargo Lock"403 // second is that the user supplies the extension EG wsdl404 // we should support BOTH cases405 // always remember we only need to validate t[1] as that's the one406 // that tells us where we are trying to map407408 // See if we can identify based on language name which is the most409 // reliable as the name should be unique410 for name := range languageDatabase {411 if strings.EqualFold(name, t[1]) {412 ExtensionToLanguage[strings.ToLower(t[0])] = []string{name}413 identified = true414 printDebugF("set to count extension: %s as language %s by language", t[0], name)415 }416 }417418 // If the above did not work, its a matter of extension match419 // note that this is less reliable as some languages share extensions420 if !identified {421 target, ok := ExtensionToLanguage[strings.ToLower(t[1])]422423 if ok {424 ExtensionToLanguage[strings.ToLower(t[0])] = target425 printDebugF("set to count extension: %s as language %s by extension", t[0], target)426 }427 }428 }429 }430}431432// LoadLanguageFeature will load a single feature as requested given the name433func LoadLanguageFeature(loadName string) {434 if !isLazy {435 return436 }437438 // Check if already loaded and if so return because we don't need to do it again439 LanguageFeaturesMutex.Lock()440 _, ok := LanguageFeatures[loadName]441 LanguageFeaturesMutex.Unlock()442 if ok {443 return444 }445446 var name string447 var value Language448449 for name, value = range languageDatabase {450 if name == loadName {451 break452 }453 }454455 startTime := makeTimestampNano()456 processLanguageFeature(loadName, value)457 printTraceF("nanoseconds to build language %s features: %d", loadName, makeTimestampNano()-startTime)458}459460func processLanguageFeature(name string, value Language) {461 complexityTrie := &Trie{}462 slCommentTrie := &Trie{}463 mlCommentTrie := &Trie{}464 stringTrie := &Trie{}465 tokenTrie := &Trie{}466 keywordBytes := make([][]byte, 0, len(value.Keywords))467 postfixExcludes := make([][]byte, 0, len(value.ComplexityChecksPostfixExcludes))468469 complexityMask := byte(0)470 singleLineCommentMask := byte(0)471 multiLineCommentMask := byte(0)472 stringMask := byte(0)473 processMask := byte(0)474475 for _, v := range value.ComplexityChecks {476 complexityMask |= v[0]477 complexityTrie.Insert(TComplexity, []byte(v))478 if !Complexity {479 tokenTrie.Insert(TComplexity, []byte(v))480 }481 }482 if !Complexity {483 processMask |= complexityMask484 }485486 for _, v := range value.ComplexityChecksPostfix {487 if !Complexity {488 tokenTrie.Insert(TComplexityPostfix, []byte(v))489 processMask |= v[0]490 }491 }492493 for _, v := range value.ComplexityChecksPostfixExcludes {494 postfixExcludes = append(postfixExcludes, []byte(v))495 }496497 for _, v := range value.LineComment {498 singleLineCommentMask |= v[0]499 slCommentTrie.Insert(TSlcomment, []byte(v))500 tokenTrie.Insert(TSlcomment, []byte(v))501 }502 processMask |= singleLineCommentMask503504 for _, v := range value.MultiLine {505 multiLineCommentMask |= v[0][0]506 mlCommentTrie.InsertClose(TMlcomment, []byte(v[0]), []byte(v[1]))507 tokenTrie.InsertClose(TMlcomment, []byte(v[0]), []byte(v[1]))508 }509 processMask |= multiLineCommentMask510511 for _, v := range value.Quotes {512 stringMask |= v.Start[0]513 stringTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))514 tokenTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))515 }516 processMask |= stringMask517518 for _, v := range value.Keywords {519 keywordBytes = append(keywordBytes, []byte(v))520 }521522 LanguageFeaturesMutex.Lock()523 LanguageFeatures[name] = LanguageFeature{524 Complexity: complexityTrie,525 MultiLineComments: mlCommentTrie,526 MultiLine: value.MultiLine,527 SingleLineComments: slCommentTrie,528 LineComment: value.LineComment,529 Strings: stringTrie,530 Tokens: tokenTrie,531 Nested: value.NestedMultiLine,532 PostfixExcludes: postfixExcludes,533 ComplexityCheckMask: complexityMask,534 MultiLineCommentMask: multiLineCommentMask,535 SingleLineCommentMask: singleLineCommentMask,536 StringCheckMask: stringMask,537 ProcessMask: processMask,538 Keywords: value.Keywords,539 KeywordBytes: keywordBytes,540 Quotes: value.Quotes,541 }542 LanguageFeaturesMutex.Unlock()543}544545func processFlags() {546 // If wide/more mode is enabled we want the complexity calculation547 // to happen regardless as that is the only purpose of the flag548 if More && Complexity {549 Complexity = false550 }551552 // If ignore minified/generated is on ensure we turn on the code to calculate that553 if IgnoreMinifiedGenerate {554 MinifiedGenerated = true555 IgnoreMinified = true556 IgnoreGenerated = true557 }558559 if MinifiedGenerated {560 Minified = true561 Generated = true562 }563564 if IgnoreMinified {565 Minified = true566 }567568 if IgnoreGenerated {569 Generated = true570 }571572 if Dryness {573 UlocMode = true574 }575576 printDebugF("Path Deny List: %v", PathDenyList)577 printDebugF("Sort By: %s", SortBy)578 printDebugF("White List: %v", AllowListExtensions)579 printDebugF("Files Output: %t", Files)580 printDebugF("Verbose: %t", Verbose)581 printDebugF("Duplicates Detection: %t", Duplicates)582 printDebugF("Complexity Calculation: %t", !Complexity)583 printDebugF("Wide: %t", More)584 // If cost-comparison is enabled, turn on both COCOMO and LOCOMO585 if CostComparison {586 Cocomo = false587 Locomo = true588 }589590 // LOCOMO needs complexity data to produce accurate estimates.591 // If complexity was disabled via --no-complexity, force it back on.592 if Locomo && Complexity {593 Complexity = false594 }595596 printDebugF("Average Wage: %d", AverageWage)597 printDebugF("Cocomo: %t", !Cocomo)598 printDebugF("Locomo: %t", Locomo)599 printDebugF("Minified/Generated Detection: %t/%t", Minified, Generated)600 printDebugF("Ignore Minified/Generated: %t/%t", IgnoreMinified, IgnoreGenerated)601 printDebugF("IncludeSymLinks: %t", IncludeSymLinks)602 printDebugF("Uloc: %t", UlocMode)603 printDebugF("Dryness: %t", Dryness)604}605606// LanguageDatabase provides access to the internal language database607// useful for consuming applications wanting to consume and use608func LanguageDatabase() map[string]Language {609 return languageDatabase610}611612func PrintLanguages(dst io.Writer) {613 names := make([]string, 0, len(languageDatabase))614 for key := range languageDatabase {615 names = append(names, key)616 }617618 slices.SortFunc(names, func(a, b string) int {619 return strings.Compare(strings.ToLower(a), strings.ToLower(b))620 })621622 for _, name := range names {623 _, _ = fmt.Fprintf(dst, "%s (%s)\n", name, strings.Join(append(languageDatabase[name].Extensions, languageDatabase[name].FileNames...), ","))624 }625}626627// global variables to deal with ULOC calculations628var ulocMutex = sync.Mutex{}629var ulocGlobalCount = map[string]struct{}{}630var ulocLanguageCount = map[string]map[string]struct{}{}631632// Process is the main entry point of the command line it sets everything up and starts running633func Process() {634 if Languages {635 PrintLanguages(os.Stdout)636 return637 }638639 ProcessConstants()640 processFlags()641 cleanVisitedPaths()642643 // Clean up any invalid arguments before setting everything up644 if len(DirFilePaths) == 0 {645 DirFilePaths = append(DirFilePaths, ".")646 }647648 filePaths := []string{}649 dirPaths := []string{}650651 // Check if the paths or files added exist and exit if not652 for _, f := range DirFilePaths {653 fpath := filepath.Clean(f)654655 s, err := os.Stat(fpath)656 if err != nil {657 fmt.Println("file or directory could not be read: " + fpath)658 os.Exit(1)659 }660661 if s.IsDir() {662 dirPaths = append(dirPaths, fpath)663 } else {664 filePaths = append(filePaths, fpath)665 }666 }667668 SortBy = strings.ToLower(SortBy)669 ctx := processorContext{remap: newRemapConfig(RemapAll, RemapUnknown)}670671 printDebugF("NumCPU: %d", runtime.NumCPU())672 printDebugF("SortBy: %s", SortBy)673 printDebugF("PathDenyList: %v", PathDenyList)674675 potentialFilesQueue := make(chan *gocodewalker.File, FileListQueueSize) // files that pass the .gitignore checks676 fileListQueue := make(chan *FileJob, FileListQueueSize) // Files ready to be read from disk677 fileSummaryJobQueue := make(chan *FileJob, FileSummaryJobQueueSize) // Files ready to be summarised678679 fileWalker := gocodewalker.NewParallelFileWalker(dirPaths, potentialFilesQueue)680 fileWalker.SetErrorHandler(func(e error) bool {681 printError(e.Error())682 return true683 })684 fileWalker.IgnoreGitIgnore = GitIgnore685 fileWalker.IgnoreIgnoreFile = Ignore686 fileWalker.IgnoreGitModules = GitModuleIgnore687 fileWalker.IncludeHidden = true688 fileWalker.ExcludeDirectory = PathDenyList689 fileWalker.SetConcurrency(DirectoryWalkerJobWorkers)690691 if !SccIgnore {692 fileWalker.CustomIgnore = []string{".sccignore"}693 }694695 var excludePathRegexes []*regexp.Regexp696 for _, exclude := range Exclude {697 regexpResult, err := regexp.Compile(exclude)698 if err == nil {699 fileWalker.ExcludeFilenameRegex = append(fileWalker.ExcludeFilenameRegex, regexpResult)700 fileWalker.ExcludeDirectoryRegex = append(fileWalker.ExcludeDirectoryRegex, regexpResult)701 excludePathRegexes = append(excludePathRegexes, regexpResult)702 } else {703 printError(err.Error())704 }705 }706707 go func() {708 err := fileWalker.Start()709 if err != nil {710 printError(err.Error())711 }712 }()713714 go func() {715 for _, f := range filePaths {716 fileInfo, err := os.Lstat(f)717 if err != nil {718 continue719 }720721 fileJob := newFileJob(f, f, fileInfo)722 if fileJob != nil {723 fileListQueue <- fileJob724 }725 }726727 for fi := range potentialFilesQueue {728 shouldExclude := false729 for _, re := range excludePathRegexes {730 if re.MatchString(fi.Location) {731 shouldExclude = true732 break733 }734 }735 if shouldExclude {736 continue737 }738739 fileInfo, err := os.Lstat(fi.Location)740 if err != nil {741 continue742 }743744 if !fileInfo.IsDir() {745 fileJob := newFileJob(fi.Location, fi.Filename, fileInfo)746 if fileJob != nil {747 fileListQueue <- fileJob748 }749 }750 }751 close(fileListQueue)752 }()753754 go ctx.fileProcessorWorker(fileListQueue, fileSummaryJobQueue)755756 result := fileSummarize(fileSummaryJobQueue)757 if FileOutput == "" {758 fmt.Print(result)759 } else {760 _ = os.WriteFile(FileOutput, []byte(result), 0644)761 fmt.Println("results written to " + FileOutput)762 }763}
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.