String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
cmd, err := scanForSheBang([]byte(content))
1// SPDX-License-Identifier: MIT23package processor45import (6 "bytes"7 "cmp"8 "errors"9 "slices"10 "strings"11)1213var (14 errMissingShebang = errors.New("missing shebang")15 errUnknownShebang = errors.New("unknown shebang")16 errUnableToDetermineShebangCmd = errors.New("unable to determine shebang command")17)1819// DetectLanguage detects a language based on the filename returns the language extension and error20func DetectLanguage(name string) ([]string, string) {21 extension := ""2223 if len(AllowListExtensions) == 0 {24 // Check the full name for special languages such as xmake.lua, meson.build, ...25 lang, ok := FilenameToLanguage[strings.ToLower(name)]26 if ok {27 return []string{lang}, name28 }2930 t := strings.Count(name, ".")31 // If there is no . in the filename or it starts with one then check if #!32 if t == 0 || (name[0] == '.' && t == 1) {33 printWarnF("possible #! file: %s", name)3435 // No extension indicates possible #! so mark as such for processing36 return []string{SheBang}, name37 }38 }3940 // Lookup in case the full name matches41 language, ok := ExtensionToLanguage[strings.ToLower(name)]4243 // If no match check if we have a matching extension44 if !ok {45 extension = getExtension(name)46 language, ok = ExtensionToLanguage[extension]47 }4849 // Convert from d.ts to ts and check that in case of multiple extensions50 if !ok {51 extension = getExtension(extension)52 language = ExtensionToLanguage[extension]53 }5455 return language, extension56}5758// DetectSheBang given some content attempt to determine if it has a #! that maps to a known language and return the language59func DetectSheBang(content string) (string, error) {60 if !strings.HasPrefix(content, "#!") {61 return "", errMissingShebang62 }6364 index := strings.Index(content, "\n")6566 if index != -1 {67 content = content[:index]68 }6970 cmd, err := scanForSheBang([]byte(content))7172 if err != nil {73 return "", err74 }7576 for k, v := range ShebangLookup {77 if slices.Contains(v, cmd) {78 // detects both full path and env usage79 return k, nil80 }81 }8283 return "", errUnknownShebang84}8586func scanForSheBang(content []byte) (string, error) {87 state := 088 lastSlash := 08990 candidate1 := ""91 candidate2 := ""9293loop:94 for i := range content {95 switch state {96 case 0: // Deals with whitespace after #! and before first /97 if content[i] == '/' {98 lastSlash = i99 state = 1100 }101 case 1: // Once we found the first / keep going till we hit whitespace102 if content[i] == '/' {103 lastSlash = i104 }105106 // when at the end pull out the candidate107 if i == len(content)-1 {108 candidate1 = string(content[lastSlash+1 : i+1])109 }110111 // between last slash and here is the first candidate which is either env or Perl/PHP/Python etc..112 if isWhitespace(content[i]) {113 // mark from lastSlash to here as first argument114 candidate1 = string(content[lastSlash+1 : i])115 state = 2116 }117 case 2: // We have the first candidate, see if there is another118 // go till end of whitespace, mark that spot as new start119 if !isWhitespace(content[i]) {120 lastSlash = i121 state = 3122 }123 case 3:124 if i == len(content)-1 {125 candidate2 = string(content[lastSlash : i+1])126 }127128 if isWhitespace(content[i]) {129 candidate2 = string(content[lastSlash:i])130 state = 4131 }132 case 4:133 break loop134 }135 }136137 switch {138 case candidate1 == "env":139 return candidate2, nil140 case candidate1 != "":141 return candidate1, nil142 }143144 return "", errUnableToDetermineShebangCmd145}146147type languageGuess struct {148 Name string149 Count int150}151152// DetermineLanguage given a filename, fallback language, possible languages and content make a guess to the type.153// If multiple possible it will guess based on keywords similar to how https://github.com/vmchale/polyglot does154func DetermineLanguage(filename string, fallbackLanguage string, possibleLanguages []string, content []byte) string {155 // If being called through an API it's possible nothing is set here and as156 // such should just return as the Language value should have already been set157 if len(possibleLanguages) == 0 {158 return fallbackLanguage159 }160161 // There should only be two possibilities now, either we have a single fallbackLanguage162 // in which case we set it and return163 // or we have multiple in which case we try to determine it heuristically164 if len(possibleLanguages) == 1 {165 return possibleLanguages[0]166 }167168 startTime := makeTimestampNano()169170 toCheck := content171 if len(content) > 20_000 {172 toCheck = content[:20_000]173 }174175 primary := ""176177 toSort := make([]languageGuess, 0, len(possibleLanguages))178 for _, lan := range possibleLanguages {179 LanguageFeaturesMutex.Lock()180 langFeatures := LanguageFeatures[lan]181 LanguageFeaturesMutex.Unlock()182183 count := 0184 for _, key := range langFeatures.KeywordBytes {185 if bytes.Contains(toCheck, key) {186 count++187 }188 }189190 // if no features are found that means that this one is considered the primary191 // and as such the default fallback if we don't find a suitable number of matching192 // keywords193 // consider YAML files for example, where cloudformation files can also be YAML194 // YAML can have any form so it's not possible to say "this is a yaml file"195 // so we can only say "this is likely to be a cloudformation file", and as such196 // we need to handle a fallback case, which in this case is nothing197 if len(langFeatures.Keywords) == 0 {198 primary = lan199 }200201 toSort = append(toSort, languageGuess{Name: lan, Count: count})202 }203204 slices.SortFunc(toSort, func(a, b languageGuess) int {205 if order := cmp.Compare(b.Count, a.Count); order != 0 {206 return order207 }208 return strings.Compare(a.Name, b.Name)209 })210211 // fmt.Println(toSort)212 // fmt.Println(possibleLanguages)213 // fmt.Println(primary, toSort[0].Name, toSort[0].Count)214215 if primary != "" && len(toSort) != 0 {216 // OK at this point we have a primary, which means we want 3 or more matches to count as something else217 if toSort[0].Count < 3 {218 // we didn't find enough results, so lets return the primary in this case219 return primary220 }221 }222223 printWarnF("guessing language %s for file %s", toSort[0].Name, filename)224 printTraceF("nanoseconds to guess language: %s: %d", filename, makeTimestampNano()-startTime)225226 if len(toSort) != 0 {227 return toSort[0].Name228 }229230 return fallbackLanguage231}
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.