Blank identifier discarding results; verify intentional ignoring of return values
content, _, _ = bytes.Cut(content, []byte{'\n'})
1// SPDX-License-Identifier: MIT23package processor45import (6 "bytes"7 "cmp"8 "errors"9 "slices"10 "strings"11)1213var (14 errMissingShebang = errors.New("missing shebang")15 errUnknownShebang = errors.New("unknown shebang")16 errUnableToDetermineShebangCmd = errors.New("unable to determine shebang command")17)1819// DetectLanguage detects a language based on the filename returns the language extension and error20func DetectLanguage(name string) ([]string, string) {21 extension := ""2223 if len(AllowListExtensions) == 0 {24 // Check the full name for special languages such as xmake.lua, meson.build, ...25 lang, ok := FilenameToLanguage[strings.ToLower(name)]26 if ok {27 return []string{lang}, name28 }2930 t := strings.Count(name, ".")31 // If there is no . in the filename or it starts with one then check if #!32 if t == 0 || (name[0] == '.' && t == 1) {33 printWarnF("possible #! file: %s", name)3435 // No extension indicates possible #! so mark as such for processing36 return []string{SheBang}, name37 }38 }3940 // Lookup in case the full name matches41 language, ok := ExtensionToLanguage[strings.ToLower(name)]4243 // If no match check if we have a matching extension44 if !ok {45 extension = getExtension(name)46 language, ok = ExtensionToLanguage[extension]47 }4849 // Convert from d.ts to ts and check that in case of multiple extensions50 if !ok {51 extension = getExtension(extension)52 language = ExtensionToLanguage[extension]53 }5455 return language, extension56}5758// DetectSheBang given some content attempt to determine if it has a #! that maps to a known language and return the language59func DetectSheBang(content []byte) (string, error) {60 if !bytes.HasPrefix(content, []byte("#!")) {61 return "", errMissingShebang62 }6364 content, _, _ = bytes.Cut(content, []byte{'\n'})6566 cmd, err := scanForSheBang(content)67 if err != nil {68 return "", err69 }7071 for k, v := range ShebangLookup {72 if slices.Contains(v, cmd) {73 // detects both full path and env usage74 return k, nil75 }76 }7778 return "", errUnknownShebang79}8081func scanForSheBang(content []byte) (string, error) {82 state := 083 lastSlash := 08485 candidate1 := ""86 candidate2 := ""8788loop:89 for i := range content {90 switch state {91 case 0: // Deals with whitespace after #! and before first /92 if content[i] == '/' {93 lastSlash = i94 state = 195 }96 case 1: // Once we found the first / keep going till we hit whitespace97 if content[i] == '/' {98 lastSlash = i99 }100101 // when at the end pull out the candidate102 if i == len(content)-1 {103 candidate1 = string(content[lastSlash+1 : i+1])104 }105106 // between last slash and here is the first candidate which is either env or Perl/PHP/Python etc..107 if isWhitespace(content[i]) {108 // mark from lastSlash to here as first argument109 candidate1 = string(content[lastSlash+1 : i])110 state = 2111 }112 case 2: // We have the first candidate, see if there is another113 // go till end of whitespace, mark that spot as new start114 if !isWhitespace(content[i]) {115 lastSlash = i116 state = 3117 }118 case 3:119 if i == len(content)-1 {120 candidate2 = string(content[lastSlash : i+1])121 }122123 if isWhitespace(content[i]) {124 candidate2 = string(content[lastSlash:i])125 state = 4126 }127 case 4:128 break loop129 }130 }131132 switch {133 case candidate1 == "env":134 return candidate2, nil135 case candidate1 != "":136 return candidate1, nil137 }138139 return "", errUnableToDetermineShebangCmd140}141142type languageGuess struct {143 Name string144 Count int145}146147// DetermineLanguage given a filename, fallback language, possible languages and content make a guess to the type.148// If multiple possible it will guess based on keywords similar to how https://github.com/vmchale/polyglot does149func DetermineLanguage(filename string, fallbackLanguage string, possibleLanguages []string, content []byte) string {150 // If being called through an API it's possible nothing is set here and as151 // such should just return as the Language value should have already been set152 if len(possibleLanguages) == 0 {153 return fallbackLanguage154 }155156 // There should only be two possibilities now, either we have a single fallbackLanguage157 // in which case we set it and return158 // or we have multiple in which case we try to determine it heuristically159 if len(possibleLanguages) == 1 {160 return possibleLanguages[0]161 }162163 startTime := makeTimestampNano()164165 toCheck := content166 if len(content) > 20_000 {167 toCheck = content[:20_000]168 }169170 primary := ""171172 toSort := make([]languageGuess, 0, len(possibleLanguages))173 for _, lan := range possibleLanguages {174 LanguageFeaturesMutex.Lock()175 langFeatures := LanguageFeatures[lan]176 LanguageFeaturesMutex.Unlock()177178 count := 0179 for _, key := range langFeatures.KeywordBytes {180 if bytes.Contains(toCheck, key) {181 count++182 }183 }184185 // if no features are found that means that this one is considered the primary186 // and as such the default fallback if we don't find a suitable number of matching187 // keywords188 // consider YAML files for example, where cloudformation files can also be YAML189 // YAML can have any form so it's not possible to say "this is a yaml file"190 // so we can only say "this is likely to be a cloudformation file", and as such191 // we need to handle a fallback case, which in this case is nothing192 if len(langFeatures.Keywords) == 0 {193 primary = lan194 }195196 toSort = append(toSort, languageGuess{Name: lan, Count: count})197 }198199 slices.SortFunc(toSort, func(a, b languageGuess) int {200 if order := cmp.Compare(b.Count, a.Count); order != 0 {201 return order202 }203 return strings.Compare(a.Name, b.Name)204 })205206 if primary != "" && len(toSort) != 0 {207 // OK at this point we have a primary, which means we want 3 or more matches to count as something else208 if toSort[0].Count < 3 {209 // we didn't find enough results, so lets return the primary in this case210 return primary211 }212 }213214 printWarnF("guessing language %s for file %s", toSort[0].Name, filename)215 printTraceF("nanoseconds to guess language: %s: %d", filename, makeTimestampNano()-startTime)216217 if len(toSort) != 0 {218 return toSort[0].Name219 }220221 return fallbackLanguage222}
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.