url_scrapping.sh - Usage: archive <domain> or bing <domain>…

/Recon/url_scrapping.sh

https://github.com/adon90/pentest_compilation · Shell · 27 lines · 15 code · 11 blank · 1 comment · 0 complexity · f303fb73e46bdbfcba799609e3ba9a33 MD5 · raw file


archive() {
	
	curl -s 'http://web.archive.org/cdx/search?url='$1'%2F&matchType=prefix&collapse=urlkey&output=json&fl=original%2Cmimetype%2Ctimestamp%2Cendtimestamp%2Cgroupcount%2Cuniqcount&filter=!statuscode%3A%5B45%5D..&limit=100000&_=1532513891577' --compressed | grep -Po "(?<=\[\").*?(?=\")"

}

bing() {

	curl "https://www.bing.com/search?q=domain%3a$1&first=1" -s |  grep -Po "(?<=<a href=\").*?(?=\" h=)" | egrep -v "microsoft|bing|pointdecontact" | grep -Po "https?.*"

}


google() {

curl -i -s -k  -X  GET   -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H $'Connection: close'     "https://www.google.to/search?q=site:$1&num=60" -x socks5://127.0.0.1:1337 | grep -Po  "(?<=iUh30\">).*?(?=<)"

}

duckduckgo() {

token=$(curl -A "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:67.0) Gecko/20100101 Firefox/67.0" "https://api.duckduckgo.com/?q=site%3A$1" -s | grep -Po "(?<=vqd=).*?(?=&)" | tail -1); curl -A "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:67.0) Gecko/20100101 Firefox/67.0" "https://duckduckgo.com/d.js?q=site%3A$1&vqd=$token" -s | grep -Po "(?<=u\":\").*?(?=\")"

}


# Usage: archive <domain> or bing <domain> or google <domain> (this last one is suppossed to be launched with doxycannon to bypass google captcha)