processor/detector_test.go GO 505 lines View on github.com → Search inside
1// SPDX-License-Identifier: MIT23package processor45import (6	"strings"7	"testing"8)910func TestDetectLanguage(t *testing.T) {11	ProcessConstants()12	AllowListExtensions = []string{"css"}13	_, ext := DetectLanguage("example.black.css")1415	if ext != "css" {16		t.Error("Expected css got", ext)17	}18	AllowListExtensions = []string{}19}2021func TestDetectSheBangEmpty(t *testing.T) {22	ProcessConstants()2324	x, y := DetectSheBang("")2526	if x != "" || y == nil {27		t.Error("Expected no match got", x)28	}29}3031func TestDetectSheBangPerl(t *testing.T) {32	ProcessConstants()3334	cases := []string{35		"#!/usr/bin/perl",36		"#!  /usr/bin/perl",37		"#!/usr/bin/perl -w",38		"#!/usr/bin/env perl",39		"#!  /usr/bin/env   perl",40		"#!/usr/bin/env perl -w",41		"#!  /usr/bin/env   perl   -w",42		"#!/opt/local/bin/perl",43		"#!/usr/bin/perl5",44	}4546	for _, c := range cases {47		x, y := DetectSheBang(c)4849		if x != "Perl" || y != nil {50			t.Error("Expected Perl match got", x, "for", c)51		}52	}53}5455func TestDetectSheBangPhp(t *testing.T) {56	ProcessConstants()5758	cases := []string{59		"#!/usr/bin/php5",60		"#!/usr/bin/php",61	}6263	for _, c := range cases {64		x, y := DetectSheBang(c)6566		if x != "PHP" || y != nil {67			t.Error("Expected PHP match got", x)68		}69	}70}7172func TestDetectSheBangPython(t *testing.T) {73	ProcessConstants()7475	cases := []string{76		"#!/usr/bin/python",77		"#!/usr/bin/python2",78		"#!/usr/bin/python3",79	}8081	for _, c := range cases {82		x, y := DetectSheBang(c)8384		if x != "Python" || y != nil {85			t.Error("Expected Python match got", x)86		}87	}88}8990func TestDetectSheBangAWK(t *testing.T) {91	ProcessConstants()9293	cases := []string{94		"#!/usr/bin/awk",95		"#!/usr/bin/gawk",96		"#!/usr/bin/mawk",97	}9899	for _, c := range cases {100		x, y := DetectSheBang(c)101102		if x != "AWK" || y != nil {103			t.Error("Expected AWK match got", x)104		}105	}106}107108func TestDetectSheBangCsh(t *testing.T) {109	ProcessConstants()110111	cases := []string{112		"#!/bin/csh",113		"#!/bin/tcsh",114	}115116	for _, c := range cases {117		x, y := DetectSheBang(c)118119		if x != "C Shell" || y != nil {120			t.Error("Expected C Shell match got", x)121		}122	}123}124125func TestDetectSheBangD(t *testing.T) {126	ProcessConstants()127128	cases := []string{129		"#!/usr/bin/env rdmd",130	}131132	for _, c := range cases {133		x, y := DetectSheBang(c)134135		if x != "D" || y != nil {136			t.Error("Expected D match got", x)137		}138	}139}140141func TestDetectSheBangNode(t *testing.T) {142	ProcessConstants()143144	cases := []string{145		"#!/usr/bin/env node",146		"#!/usr/bin/node",147	}148149	for _, c := range cases {150		x, y := DetectSheBang(c)151152		if x != "JavaScript" || y != nil {153			t.Error("Expected JavaScript match got", x)154		}155	}156}157158func TestDetectSheBangLisp(t *testing.T) {159	ProcessConstants()160161	cases := []string{162		"#!/usr/bin/env sbcl",163		"#!/usr/bin/sbcl",164	}165166	for _, c := range cases {167		x, y := DetectSheBang(c)168169		if x != "Lisp" || y != nil {170			t.Error("Expected Lisp match got", x)171		}172	}173}174175func TestDetectSheBangRacket(t *testing.T) {176	ProcessConstants()177178	cases := []string{179		"#!/usr/bin/env racket",180		"#!/usr/bin/racket",181	}182183	for _, c := range cases {184		x, y := DetectSheBang(c)185186		if x != "Racket" || y != nil {187			t.Error("Expected Racket match got", x)188		}189	}190}191192func TestDetectSheBangFish(t *testing.T) {193	ProcessConstants()194195	cases := []string{196		"#!/usr/bin/env fish",197		"#!/usr/bin/fish",198		"#!/bin/fish",199	}200201	for _, c := range cases {202		x, y := DetectSheBang(c)203204		if x != "Fish" || y != nil {205			t.Error("Expected Fish match got", x)206		}207	}208}209210func TestDetectSheBangShell(t *testing.T) {211	ProcessConstants()212213	cases := []string{214		"#!/usr/bin/env sh",215		"#!/bin/sh",216	}217218	for _, c := range cases {219		x, y := DetectSheBang(c)220221		if x != "Shell" || y != nil {222			t.Error("Expected Shell match got", x)223		}224	}225}226227func TestDetectSheBangRuby(t *testing.T) {228	ProcessConstants()229230	cases := []string{231		"#!/usr/bin/env ruby",232		"#!/usr/bin/ruby",233	}234235	for _, c := range cases {236		x, y := DetectSheBang(c)237238		if x != "Ruby" || y != nil {239			t.Error("Expected Ruby match got", x)240		}241	}242}243244func TestDetectSheBangLua(t *testing.T) {245	ProcessConstants()246247	cases := []string{248		"#!/usr/bin/env lua",249		"#!/usr/bin/lua",250	}251252	for _, c := range cases {253		x, y := DetectSheBang(c)254255		if x != "Lua" || y != nil {256			t.Error("Expected Lua match got", x)257		}258	}259}260261func TestDetectSheBangMultiple(t *testing.T) {262	ProcessConstants()263264	x, y := DetectSheBang(`#!/python/perl/ruby`)265266	if x != "Ruby" || y != nil {267		t.Error("Expected Ruby match got", x)268	}269}270271func TestDetectSheBangMultipleNewLine(t *testing.T) {272	ProcessConstants()273274	x, y := DetectSheBang(`#!/python/perl/ruby275python perl fish`)276277	if x != "Ruby" || y != nil {278		t.Error("Expected Ruby match got", x)279	}280}281282func TestScanSheBang(t *testing.T) {283	cases := []string{284		"#!/usr/bin/perl",285		"#!  /usr/bin/perl",286		"#!/usr/bin/perl -w",287		"#!/usr/bin/env perl",288		"#!  /usr/bin/env   perl",289		"#!/usr/bin/env perl -w",290		"#!  /usr/bin/env   perl   -w",291		"#!/opt/local/bin/perl",292	}293294	for _, c := range cases {295		r, _ := scanForSheBang([]byte(c))296297		if r != "perl" {298			t.Errorf("Expected 'perl' got '%s' for %s", r, c)299		}300	}301}302303// Randomly try things to see what happens304func TestScanSheBangFuzz(t *testing.T) {305	for i := 0; i < 1000; i++ {306		x, _ := scanForSheBang([]byte(randStringBytes(100)))307308		if x == "NEVERHAPPEN" {309			t.Errorf("Errr wot?")310		}311	}312}313314func TestCheckFullNameSheBang(t *testing.T) {315	ProcessConstants()316317	r, n := DetectLanguage("name")318319	if n != "name" {320		t.Error("Expected name to return")321	}322323	if r[0] != "#!" {324		t.Error("Expected #! return")325	}326}327328func TestCheckFullNameLicense(t *testing.T) {329	ProcessConstants()330331	r, n := DetectLanguage("license")332333	if n != "license" {334		t.Error("Expected name to return")335	}336337	if r[0] != "License" {338		t.Error("Expected License return")339	}340}341342func TestCheckFullNameXMake(t *testing.T) {343	ProcessConstants()344345	r, n := DetectLanguage("xmake.lua")346347	if n != "xmake.lua" {348		t.Error("Expected xmake.lua to return")349	}350351	if r[0] != "XMake" {352		t.Error("Expected XMake return")353	}354355	// count xmake.lua as a lua file if AllowListExtensions was set356	AllowListExtensions = []string{"lua"}357	r, n = DetectLanguage("xmake.lua")358359	if n != "lua" {360		t.Error("Expected lua to return")361	}362363	if r[0] != "Lua" {364		t.Error("Expected Lua return")365	}366	AllowListExtensions = []string{}367}368369func TestGuessLanguageCoq(t *testing.T) {370	res := DetermineLanguage("", "", []string{"Coq", "SystemVerilog"}, []byte(`Require Hypothesis Inductive`))371372	if res != "Coq" {373		t.Error("Expected guessed language to have been Coq got", res)374	}375}376377func TestGuessLanguageSystemVerilog(t *testing.T) {378	res := DetermineLanguage("", "", []string{"Coq", "SystemVerilog"}, []byte(`endmodule posedge edge always wire`))379380	if res != "SystemVerilog" {381		t.Error("Expected guessed language to have been SystemVerilog got", res)382	}383}384385func TestDetectLanguageIEC61131S7DCL(t *testing.T) {386	ProcessConstants()387388	possible, ext := DetectLanguage("types.s7dcl")389	if ext != "s7dcl" {390		t.Error("Expected s7dcl got", ext)391	}392	found := false393	for _, language := range possible {394		if language == "IEC61131-3" {395			found = true396			break397		}398	}399	if !found {400		t.Error("Expected IEC61131-3 got", possible)401	}402}403404func TestGuessLanguageIEC61131SCL(t *testing.T) {405	ProcessConstants()406407	content := []byte(`FUNCTION_BLOCK "MotorControl"408VAR_INPUT409    Start : BOOL;410END_VAR411BEGIN412    IF Start THEN413        Speed := 100;414    END_IF;415END_FUNCTION_BLOCK`)416417	res := DetermineLanguage("motor.scl", "", []string{"IEC61131-3", "Scallop"}, content)418	if res != "IEC61131-3" {419		t.Error("Expected guessed language to have been IEC61131-3 got", res)420	}421}422423func TestGuessLanguageScallopSCL(t *testing.T) {424	ProcessConstants()425426	content := []byte(`rel classes = {0, 1, 2}427rel count_enroll_cs_in_class(c, n) :-428  n = count(s: student(c, s), enroll(s, "CS") where c: classes(c))429query count_enroll_cs_in_class`)430431	res := DetermineLanguage("scallop.scl", "", []string{"IEC61131-3", "Scallop"}, content)432	if res != "Scallop" {433		t.Error("Expected guessed language to have been Scallop got", res)434	}435}436437func TestGuessLanguageLanguageSetNoPossible(t *testing.T) {438	res := DetermineLanguage("", "Java", []string{}, []byte(`endmodule posedge edge always wire`))439440	if res != "Java" {441		t.Error("Expected guessed language to have been Java got", res)442	}443}444445func TestGuessLanguageSingleLanguageSet(t *testing.T) {446	res := DetermineLanguage("", "Java", []string{"Rust"}, []byte(`endmodule posedge edge always wire`))447448	if res != "Rust" {449		t.Error("Expected guessed language to have been Rust got", res)450	}451}452453func TestGuessLanguageLanguageEmptyContent(t *testing.T) {454	res := DetermineLanguage("", "", []string{"Rust"}, []byte(``))455456	if res != "Rust" {457		t.Error("Expected guessed language to have been Rust got", res)458	}459}460461// Benchmarks below462463func BenchmarkScanSheBangFuzz(b *testing.B) {464	for i := 0; i < b.N; i++ {465		_, _ = scanForSheBang([]byte(randStringBytes(100)))466	}467}468469func BenchmarkScanSheBangReal(b *testing.B) {470	for i := 0; i < b.N; i++ {471		_, _ = scanForSheBang([]byte("#!  /usr/bin/env   perl   -w"))472	}473}474475func BenchmarkDetermineLanguage(b *testing.B) {476	ProcessConstants()477478	coqContent := []byte("Require Hypothesis Inductive\n")479	systemVerilogContent := []byte("endmodule posedge edge always wire\n")480	largeCoqContent := []byte("Require Hypothesis Inductive\n" + strings.Repeat("x", 25_000))481	largeSystemVerilogContent := []byte("endmodule posedge edge always wire\n" + strings.Repeat("y", 25_000))482	possibleLanguages := []string{"Coq", "SystemVerilog"}483484	benchmarks := []struct {485		name    string486		content []byte487	}{488		{name: "small_coq", content: coqContent},489		{name: "small_systemverilog", content: systemVerilogContent},490		{name: "large_coq_over_cutoff", content: largeCoqContent},491		{name: "large_systemverilog_over_cutoff", content: largeSystemVerilogContent},492	}493494	for _, benchmark := range benchmarks {495		b.Run(benchmark.name, func(b *testing.B) {496			b.ReportAllocs()497			b.SetBytes(int64(len(benchmark.content)))498499			for i := 0; i < b.N; i++ {500				_ = DetermineLanguage("", "", possibleLanguages, benchmark.content)501			}502		})503	}504}

Code quality findings 10

Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_, _ = scanForSheBang([]byte("#! /usr/bin/env perl -w"))
Blank identifier discarding results; verify intentional ignoring of return values
warning correctness blank-identifier-discard
_ = DetermineLanguage("", "", possibleLanguages, benchmark.content)
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
r, _ := scanForSheBang([]byte(c))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
x, _ := scanForSheBang([]byte(randStringBytes(100)))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
res := DetermineLanguage("", "", []string{"Rust"}, []byte(``))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
_, _ = scanForSheBang([]byte(randStringBytes(100)))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
_, _ = scanForSheBang([]byte("#! /usr/bin/env perl -w"))
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
coqContent := []byte("Require Hypothesis Inductive\n")
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
systemVerilogContent := []byte("endmodule posedge edge always wire\n")
String to byte slice conversion inside loop allocates a new slice each iteration; convert once before the loop
info correctness string-to-byte-in-loop
largeCoqContent := []byte("Require Hypothesis Inductive\n" + strings.Repeat("x", 25_000))

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.