diff options
Diffstat (limited to 'src/usr.bin/nc/scripts/websearch')
| -rw-r--r-- | src/usr.bin/nc/scripts/websearch | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/src/usr.bin/nc/scripts/websearch b/src/usr.bin/nc/scripts/websearch new file mode 100644 index 0000000000..60c3a3356a --- /dev/null +++ b/src/usr.bin/nc/scripts/websearch | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | #! /bin/sh | ||
| 2 | ## Hit the major search engines. Hose the [large] output to a file! | ||
| 3 | ## autoconverts multiple arguments into the right format for given servers -- | ||
| 4 | ## usually worda+wordb, with certain lame exceptions like dejanews. | ||
| 5 | ## Extracting and post-sorting the URLs is highly recommended... | ||
| 6 | ## | ||
| 7 | ## Altavista currently handled by a separate script; may merge at some point. | ||
| 8 | ## | ||
| 9 | ## _H* original 950824, updated 951218 and 960209 | ||
| 10 | |||
| 11 | test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1 | ||
| 12 | PLUSARG="`echo $* | sed 's/ /+/g'`" | ||
| 13 | PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`" | ||
| 14 | IFILE=/tmp/.webq.$$ | ||
| 15 | |||
| 16 | # Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit. | ||
| 17 | doquery () { | ||
| 18 | echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3" | ||
| 19 | } | ||
| 20 | |||
| 21 | # changed since original: now supplying port numbers and separator lines... | ||
| 22 | |||
| 23 | echo "=== Yahoo ===" | ||
| 24 | doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80 | ||
| 25 | |||
| 26 | echo '' ; echo "=== Webcrawler ===" | ||
| 27 | doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80 | ||
| 28 | |||
| 29 | # the infoseek lamers want "registration" before they do a real search, but... | ||
| 30 | echo '' ; echo "=== Infoseek ===" | ||
| 31 | echo " is broken." | ||
| 32 | # doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80 | ||
| 33 | # ... which doesn't work cuz their lame server wants the extra newlines, WITH | ||
| 34 | # CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If | ||
| 35 | # you want to play, the basic idea and query formats follow. | ||
| 36 | # echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE | ||
| 37 | # echo "" >> $IFILE | ||
| 38 | # nc -v -w 30 guide-p.infoseek.com 80 < $IFILE | ||
| 39 | |||
| 40 | # this is kinda flakey; might have to do twice?? | ||
| 41 | echo '' ; echo "=== Opentext ===" | ||
| 42 | doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \ | ||
| 43 | search.opentext.com 80 | ||
| 44 | |||
| 45 | # looks like inktomi will only take hits=100, or defaults back to 30 | ||
| 46 | # we try to suppress all the stupid rating dots here, too | ||
| 47 | echo '' ; echo "=== Inktomi ===" | ||
| 48 | doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \ | ||
| 49 | sed '/^<IMG ALT.*inktomi.*\.gif">$/d' | ||
| 50 | |||
| 51 | #djnews lame shit limits hits to 120 and has nonstandard format | ||
| 52 | echo '' ; echo "=== Dejanews ===" | ||
| 53 | doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \ | ||
| 54 | smithers.dejanews.com 80 | ||
| 55 | |||
| 56 | # OLD lycos: used to work until they fucking BROKE it... | ||
| 57 | # doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \ | ||
| 58 | # query5.lycos.cs.cmu.edu 80 | ||
| 59 | # NEW lycos: wants the User-agent field present in query or it returns nothing | ||
| 60 | # 960206: webmaster@lycos duly bitched at | ||
| 61 | # 960208: reply received; here's how we will now handle it: | ||
| 62 | echo \ | ||
| 63 | "GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \ | ||
| 64 | > $IFILE | ||
| 65 | echo "User-agent: *FUCK OFF*" >> $IFILE | ||
| 66 | echo "Why: go ask todd@pointcom.com (Todd Whitney)" >> $IFILE | ||
| 67 | echo '' >> $IFILE | ||
| 68 | echo '' ; echo "=== Lycos ===" | ||
| 69 | nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE | ||
| 70 | |||
| 71 | rm -f $IFILE | ||
| 72 | exit 0 | ||
| 73 | |||
| 74 | # CURRENTLY BROKEN [?] | ||
| 75 | # infoseek | ||
| 76 | |||
| 77 | # some args need to be redone to ensure whatever "and" mode applies | ||
