processor/file.go GO 187 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"os"7	"path/filepath"8	"strings"9	"sync"10)1112// Used as quick lookup for files with the same name to avoid some processing13// needs to be sync.Map as it potentially could be called by many GoRoutines14var extensionCache sync.Map1516// Added as a way to track files per run.17var visitedPaths sync.Map1819// A custom version of extracting extensions for a file20// which also has a case-insensitive cache in order to save21// some needless processing22func getExtension(name string) string {23	name = strings.ToLower(name)24	extension, ok := extensionCache.Load(name)2526	if ok {27		return extension.(string)28	}2930	ext := filepath.Ext(name)3132	if ext == "" || strings.LastIndex(name, ".") == 0 {33		extension = name34	} else {35		// Handling multiple dots or multiple extensions only needs to delete the last extension36		// and then call filepath.Ext.37		// If there are multiple extensions, it is the value of subExt,38		// otherwise subExt is an empty string.39		subExt := filepath.Ext(strings.TrimSuffix(name, ext))40		extension = strings.TrimPrefix(subExt+ext, ".")41	}4243	extensionCache.Store(name, extension)44	return extension.(string)45}4647func cleanVisitedPaths() {48	visitedPaths.Clear()49}5051func newFileJob(path, name string, fileInfo os.FileInfo) *FileJob {52	if NoLarge {53		if fileInfo.Size() >= LargeByteCount {54			printWarnF("skipping large file due to byte size: %s", path)55			return nil56		}57	}5859	var symPath = ""60	// Check if the file is a symlink and if we want to count those then work out its path and rejig61	// everything so we can count the real file to ensure the counts are correct62	if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink {63		if !IncludeSymLinks {64			printWarnF("skipping symlink file: %s", name)65			return nil66		}6768		var err error69		symPath, err = filepath.EvalSymlinks(path)70		if err != nil {71			printError(err.Error())72			return nil73		}74		fileInfo, err = os.Lstat(symPath)75		if err != nil {76			printError(err.Error())77			return nil78		}79	}8081	// Skip non-regular files. They are unlikely to be code and may hang if we82	// try and read them.83	if !fileInfo.Mode().IsRegular() {84		printWarnF("skipping non-regular file: %s", path)85		return nil86	}8788	// This determines the real path89	realPath := path90	if symPath != "" {91		realPath = symPath92	}9394	// Prevent duplicate processing and loops95	if _, exists := visitedPaths.Load(realPath); exists {96		printWarnF("skipping already processed file: %s", realPath)97		return nil98	}99	visitedPaths.Store(realPath, true)100101	language, extension := DetectLanguage(name)102103	// Path pattern count rules can relabel a file to a new minted category and104	// can also rescue files that normal detection would otherwise skip. First105	// matching rule wins, evaluated against the full path as supplied.106	if len(compiledCountRules) != 0 {107		for _, r := range compiledCountRules {108			if r.re.MatchString(path) {109				language = []string{r.name}110				LoadLanguageFeature(r.name)111				break112			}113		}114	}115116	if len(language) != 0 {117		// check if extensions in the allow list, which should limit to just those extensions118		if len(AllowListExtensions) != 0 {119			ok := false120			for _, x := range AllowListExtensions {121				if x == extension {122					ok = true123				}124			}125126			if !ok {127				printWarnF("skipping file as not in allow list: %s", name)128				return nil129			}130		}131132		// check if we should exclude this type133		if len(ExcludeListExtensions) != 0 {134			ok := true135			for _, x := range ExcludeListExtensions {136				if x == extension {137					ok = false138				}139			}140141			if !ok {142				printWarnF("skipping file as in exclude list: %s", name)143				return nil144			}145		}146147		if len(ExcludeFilename) != 0 {148			ok := true149			for _, x := range ExcludeFilename {150				if strings.Contains(name, x) {151					ok = false152				}153			}154155			if !ok {156				printWarnF("skipping file as in exclude file list: %s", name)157				return nil158			}159		}160161		for _, l := range language {162			LoadLanguageFeature(l)163		}164165		if !CountIgnore {166			for _, l := range language {167				if l == "ignore" || l == "gitignore" {168					return nil169				}170			}171		}172173		return &FileJob{174			Location:          path,175			Symlocation:       symPath,176			Filename:          name,177			Extension:         extension,178			PossibleLanguages: language,179			Bytes:             fileInfo.Size(),180		}181	} else {182		printWarnF("skipping file unknown extension: %s", name)183	}184185	return nil186}

Code quality findings 1

Avoid unsafe type assertions like val.(type); prefer structs or interfaces for type safety
warning safety unsafe-type-assertion
return extension.(string)

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.