processor/detector_test.go GO 507 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"slices"7	"strings"8	"testing"9)1011func TestDetectLanguage(t *testing.T) {12	ProcessConstants()13	AllowListExtensions = []string{"css"}14	_, ext := DetectLanguage("example.black.css")1516	if ext != "css" {17		t.Error("Expected css got", ext)18	}19	AllowListExtensions = []string{}20}2122func TestDetectSheBangEmpty(t *testing.T) {23	ProcessConstants()2425	x, y := DetectSheBang([]byte{})2627	if x != "" || y == nil {28		t.Error("Expected no match got", x)29	}3031	x, y = DetectSheBang(nil)3233	if x != "" || y == nil {34		t.Error("Expected no match got", x)35	}36}3738func TestDetectSheBangPerl(t *testing.T) {39	ProcessConstants()4041	cases := []string{42		"#!/usr/bin/perl",43		"#!  /usr/bin/perl",44		"#!/usr/bin/perl -w",45		"#!/usr/bin/env perl",46		"#!  /usr/bin/env   perl",47		"#!/usr/bin/env perl -w",48		"#!  /usr/bin/env   perl   -w",49		"#!/opt/local/bin/perl",50		"#!/usr/bin/perl5",51	}5253	for _, c := range cases {54		x, y := DetectSheBang([]byte(c))5556		if x != "Perl" || y != nil {57			t.Error("Expected Perl match got", x, "for", c)58		}59	}60}6162func TestDetectSheBangPhp(t *testing.T) {63	ProcessConstants()6465	cases := []string{66		"#!/usr/bin/php5",67		"#!/usr/bin/php",68	}6970	for _, c := range cases {71		x, y := DetectSheBang([]byte(c))7273		if x != "PHP" || y != nil {74			t.Error("Expected PHP match got", x)75		}76	}77}7879func TestDetectSheBangPython(t *testing.T) {80	ProcessConstants()8182	cases := []string{83		"#!/usr/bin/python",84		"#!/usr/bin/python2",85		"#!/usr/bin/python3",86	}8788	for _, c := range cases {89		x, y := DetectSheBang([]byte(c))9091		if x != "Python" || y != nil {92			t.Error("Expected Python match got", x)93		}94	}95}9697func TestDetectSheBangAWK(t *testing.T) {98	ProcessConstants()99100	cases := []string{101		"#!/usr/bin/awk",102		"#!/usr/bin/gawk",103		"#!/usr/bin/mawk",104	}105106	for _, c := range cases {107		x, y := DetectSheBang([]byte(c))108109		if x != "AWK" || y != nil {110			t.Error("Expected AWK match got", x)111		}112	}113}114115func TestDetectSheBangCsh(t *testing.T) {116	ProcessConstants()117118	cases := []string{119		"#!/bin/csh",120		"#!/bin/tcsh",121	}122123	for _, c := range cases {124		x, y := DetectSheBang([]byte(c))125126		if x != "C Shell" || y != nil {127			t.Error("Expected C Shell match got", x)128		}129	}130}131132func TestDetectSheBangD(t *testing.T) {133	ProcessConstants()134135	cases := []string{136		"#!/usr/bin/env rdmd",137	}138139	for _, c := range cases {140		x, y := DetectSheBang([]byte(c))141142		if x != "D" || y != nil {143			t.Error("Expected D match got", x)144		}145	}146}147148func TestDetectSheBangNode(t *testing.T) {149	ProcessConstants()150151	cases := []string{152		"#!/usr/bin/env node",153		"#!/usr/bin/node",154	}155156	for _, c := range cases {157		x, y := DetectSheBang([]byte(c))158159		if x != "JavaScript" || y != nil {160			t.Error("Expected JavaScript match got", x)161		}162	}163}164165func TestDetectSheBangLisp(t *testing.T) {166	ProcessConstants()167168	cases := []string{169		"#!/usr/bin/env sbcl",170		"#!/usr/bin/sbcl",171	}172173	for _, c := range cases {174		x, y := DetectSheBang([]byte(c))175176		if x != "Lisp" || y != nil {177			t.Error("Expected Lisp match got", x)178		}179	}180}181182func TestDetectSheBangRacket(t *testing.T) {183	ProcessConstants()184185	cases := []string{186		"#!/usr/bin/env racket",187		"#!/usr/bin/racket",188	}189190	for _, c := range cases {191		x, y := DetectSheBang([]byte(c))192193		if x != "Racket" || y != nil {194			t.Error("Expected Racket match got", x)195		}196	}197}198199func TestDetectSheBangFish(t *testing.T) {200	ProcessConstants()201202	cases := []string{203		"#!/usr/bin/env fish",204		"#!/usr/bin/fish",205		"#!/bin/fish",206	}207208	for _, c := range cases {209		x, y := DetectSheBang([]byte(c))210211		if x != "Fish" || y != nil {212			t.Error("Expected Fish match got", x)213		}214	}215}216217func TestDetectSheBangShell(t *testing.T) {218	ProcessConstants()219220	cases := []string{221		"#!/usr/bin/env sh",222		"#!/bin/sh",223	}224225	for _, c := range cases {226		x, y := DetectSheBang([]byte(c))227228		if x != "Shell" || y != nil {229			t.Error("Expected Shell match got", x)230		}231	}232}233234func TestDetectSheBangRuby(t *testing.T) {235	ProcessConstants()236237	cases := []string{238		"#!/usr/bin/env ruby",239		"#!/usr/bin/ruby",240	}241242	for _, c := range cases {243		x, y := DetectSheBang([]byte(c))244245		if x != "Ruby" || y != nil {246			t.Error("Expected Ruby match got", x)247		}248	}249}250251func TestDetectSheBangLua(t *testing.T) {252	ProcessConstants()253254	cases := []string{255		"#!/usr/bin/env lua",256		"#!/usr/bin/lua",257	}258259	for _, c := range cases {260		x, y := DetectSheBang([]byte(c))261262		if x != "Lua" || y != nil {263			t.Error("Expected Lua match got", x)264		}265	}266}267268func TestDetectSheBangMultiple(t *testing.T) {269	ProcessConstants()270271	x, y := DetectSheBang([]byte(`#!/python/perl/ruby`))272273	if x != "Ruby" || y != nil {274		t.Error("Expected Ruby match got", x)275	}276}277278func TestDetectSheBangMultipleNewLine(t *testing.T) {279	ProcessConstants()280281	data := `#!/python/perl/ruby282python perl fish`283	x, y := DetectSheBang([]byte(data))284285	if x != "Ruby" || y != nil {286		t.Error("Expected Ruby match got", x)287	}288}289290func TestScanSheBang(t *testing.T) {291	cases := []string{292		"#!/usr/bin/perl",293		"#!  /usr/bin/perl",294		"#!/usr/bin/perl -w",295		"#!/usr/bin/env perl",296		"#!  /usr/bin/env   perl",297		"#!/usr/bin/env perl -w",298		"#!  /usr/bin/env   perl   -w",299		"#!/opt/local/bin/perl",300	}301302	for _, c := range cases {303		r, _ := scanForSheBang([]byte(c))304305		if r != "perl" {306			t.Errorf("Expected 'perl' got '%s' for %s", r, c)307		}308	}309}310311// Randomly try things to see what happens312func TestScanSheBangFuzz(t *testing.T) {313	for range 1000 {314		x, _ := scanForSheBang([]byte(randStringBytes(100)))315316		if x == "NEVERHAPPEN" {317			t.Errorf("Errr wot?")318		}319	}320}321322func TestCheckFullNameSheBang(t *testing.T) {323	ProcessConstants()324325	r, n := DetectLanguage("name")326327	if n != "name" {328		t.Error("Expected name to return")329	}330331	if r[0] != "#!" {332		t.Error("Expected #! return")333	}334}335336func TestCheckFullNameLicense(t *testing.T) {337	ProcessConstants()338339	r, n := DetectLanguage("license")340341	if n != "license" {342		t.Error("Expected name to return")343	}344345	if r[0] != "License" {346		t.Error("Expected License return")347	}348}349350func TestCheckFullNameXMake(t *testing.T) {351	ProcessConstants()352353	r, n := DetectLanguage("xmake.lua")354355	if n != "xmake.lua" {356		t.Error("Expected xmake.lua to return")357	}358359	if r[0] != "XMake" {360		t.Error("Expected XMake return")361	}362363	// count xmake.lua as a lua file if AllowListExtensions was set364	AllowListExtensions = []string{"lua"}365	r, n = DetectLanguage("xmake.lua")366367	if n != "lua" {368		t.Error("Expected lua to return")369	}370371	if r[0] != "Lua" {372		t.Error("Expected Lua return")373	}374	AllowListExtensions = []string{}375}376377func TestGuessLanguageCoq(t *testing.T) {378	res := DetermineLanguage("", "", []string{"Coq", "SystemVerilog"}, []byte(`Require Hypothesis Inductive`))379380	if res != "Coq" {381		t.Error("Expected guessed language to have been Coq got", res)382	}383}384385func TestGuessLanguageSystemVerilog(t *testing.T) {386	res := DetermineLanguage("", "", []string{"Coq", "SystemVerilog"}, []byte(`endmodule posedge edge always wire`))387388	if res != "SystemVerilog" {389		t.Error("Expected guessed language to have been SystemVerilog got", res)390	}391}392393func TestDetectLanguageIEC61131S7DCL(t *testing.T) {394	ProcessConstants()395396	possible, ext := DetectLanguage("types.s7dcl")397	if ext != "s7dcl" {398		t.Error("Expected s7dcl got", ext)399	}400	found := slices.Contains(possible, "IEC61131-3")401	if !found {402		t.Error("Expected IEC61131-3 got", possible)403	}404}405406func TestGuessLanguageIEC61131SCL(t *testing.T) {407	ProcessConstants()408409	content := []byte(`FUNCTION_BLOCK "MotorControl"410VAR_INPUT411    Start : BOOL;412END_VAR413BEGIN414    IF Start THEN415        Speed := 100;416    END_IF;417END_FUNCTION_BLOCK`)418419	res := DetermineLanguage("motor.scl", "", []string{"IEC61131-3", "Scallop"}, content)420	if res != "IEC61131-3" {421		t.Error("Expected guessed language to have been IEC61131-3 got", res)422	}423}424425func TestGuessLanguageScallopSCL(t *testing.T) {426	ProcessConstants()427428	content := []byte(`rel classes = {0, 1, 2}429rel count_enroll_cs_in_class(c, n) :-430  n = count(s: student(c, s), enroll(s, "CS") where c: classes(c))431query count_enroll_cs_in_class`)432433	res := DetermineLanguage("scallop.scl", "", []string{"IEC61131-3", "Scallop"}, content)434	if res != "Scallop" {435		t.Error("Expected guessed language to have been Scallop got", res)436	}437}438439func TestGuessLanguageLanguageSetNoPossible(t *testing.T) {440	res := DetermineLanguage("", "Java", []string{}, []byte(`endmodule posedge edge always wire`))441442	if res != "Java" {443		t.Error("Expected guessed language to have been Java got", res)444	}445}446447func TestGuessLanguageSingleLanguageSet(t *testing.T) {448	res := DetermineLanguage("", "Java", []string{"Rust"}, []byte(`endmodule posedge edge always wire`))449450	if res != "Rust" {451		t.Error("Expected guessed language to have been Rust got", res)452	}453}454455func TestGuessLanguageLanguageEmptyContent(t *testing.T) {456	res := DetermineLanguage("", "", []string{"Rust"}, []byte(``))457458	if res != "Rust" {459		t.Error("Expected guessed language to have been Rust got", res)460	}461}462463// Benchmarks below464465func BenchmarkScanSheBangFuzz(b *testing.B) {466	for i := 0; i < b.N; i++ {467		_, _ = scanForSheBang([]byte(randStringBytes(100)))468	}469}470471func BenchmarkScanSheBangReal(b *testing.B) {472	for i := 0; i < b.N; i++ {473		_, _ = scanForSheBang([]byte("#!  /usr/bin/env   perl   -w"))474	}475}476477func BenchmarkDetermineLanguage(b *testing.B) {478	ProcessConstants()479480	coqContent := []byte("Require Hypothesis Inductive\n")481	systemVerilogContent := []byte("endmodule posedge edge always wire\n")482	largeCoqContent := []byte("Require Hypothesis Inductive\n" + strings.Repeat("x", 25_000))483	largeSystemVerilogContent := []byte("endmodule posedge edge always wire\n" + strings.Repeat("y", 25_000))484	possibleLanguages := []string{"Coq", "SystemVerilog"}485486	benchmarks := []struct {487		name    string488		content []byte489	}{490		{name: "small_coq", content: coqContent},491		{name: "small_systemverilog", content: systemVerilogContent},492		{name: "large_coq_over_cutoff", content: largeCoqContent},493		{name: "large_systemverilog_over_cutoff", content: largeSystemVerilogContent},494	}495496	for _, benchmark := range benchmarks {497		b.Run(benchmark.name, func(b *testing.B) {498			b.ReportAllocs()499			b.SetBytes(int64(len(benchmark.content)))500501			for i := 0; i < b.N; i++ {502				_ = DetermineLanguage("", "", possibleLanguages, benchmark.content)503			}504		})505	}506}

Code quality findings 23

Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = scanForSheBang([]byte("#! /usr/bin/env perl -w"))
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = DetermineLanguage("", "", possibleLanguages, benchmark.content)
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, y := DetectSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
r, _ := scanForSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, _ := scanForSheBang([]byte(randStringBytes(100)))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
res := DetermineLanguage("", "", []string{"Rust"}, []byte(``))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
_, _ = scanForSheBang([]byte(randStringBytes(100)))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
_, _ = scanForSheBang([]byte("#! /usr/bin/env perl -w"))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
coqContent := []byte("Require Hypothesis Inductive\n")
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
systemVerilogContent := []byte("endmodule posedge edge always wire\n")
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
largeCoqContent := []byte("Require Hypothesis Inductive\n" + strings.Repeat("x", 25_000))

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.