summaryrefslogtreecommitdiff
path: root/src/usr.bin/nc/scripts/web
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/usr.bin/nc/scripts/web148
-rw-r--r--src/usr.bin/nc/scripts/webproxy138
-rw-r--r--src/usr.bin/nc/scripts/webrelay44
-rw-r--r--src/usr.bin/nc/scripts/websearch77
4 files changed, 407 insertions, 0 deletions
diff --git a/src/usr.bin/nc/scripts/web b/src/usr.bin/nc/scripts/web
new file mode 100644
index 0000000000..382b18e1e3
--- /dev/null
+++ b/src/usr.bin/nc/scripts/web
@@ -0,0 +1,148 @@
1#! /bin/sh
2## The web sucks. It is a mighty dismal kludge built out of a thousand
3## tiny dismal kludges all band-aided together, and now these bottom-line
4## clueless pinheads who never heard of "TCP handshake" want to run
5## *commerce* over the damn thing. Ye godz. Welcome to TV of the next
6## century -- six million channels of worthless shit to choose from, and
7## about as much security as today's cable industry!
8##
9## Having grown mightily tired of pain in the ass browsers, I decided
10## to build the minimalist client. It doesn't handle POST, just GETs, but
11## the majority of cgi forms handlers apparently ignore the method anyway.
12## A distinct advantage is that it *doesn't* pass on any other information
13## to the server, like Referer: or info about your local machine such as
14## Netscum tries to!
15##
16## Since the first version, this has become the *almost*-minimalist client,
17## but it saves a lot of typing now. And with netcat as its backend, it's
18## totally the balls. Don't have netcat? Get it here in /src/hacks!
19## _H* 950824, updated 951009 et seq.
20##
21## args: hostname [port]. You feed it the filename-parts of URLs.
22## In the loop, HOST, PORT, and SAVE do the right things; a null line
23## gets the previous spec again [useful for initial timeouts]; EOF to exit.
24## Relative URLs behave like a "cd" to wherever the last slash appears, or
25## just use the last component with the saved preceding "directory" part.
26## "\" clears the "filename" part and asks for just the "directory", and
27## ".." goes up one "directory" level while retaining the "filename" part.
28## Play around; you'll get used to it.
29
30if test "$1" = "" ; then
31 echo Needs hostname arg.
32 exit 1
33fi
34umask 022
35
36# optional PATH fixup
37# PATH=${HOME}:${PATH} ; export PATH
38
39test "${PAGER}" || PAGER=more
40BACKEND="nc -v -w 15"
41TMPAGE=/tmp/web$$
42host="$1"
43port="80"
44if test "$2" != "" ; then
45 port="$2"
46fi
47
48spec="/"
49specD="/"
50specF=''
51saving=''
52
53# be vaguely smart about temp file usage. Use your own homedir if you're
54# paranoid about someone symlink-racing your shell script, jeez.
55rm -f ${TMPAGE}
56test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1
57
58# get loopy. Yes, I know "echo -n" aint portable. Everything echoed would
59# need "\c" tacked onto the end in an SV universe, which you can fix yourself.
60while echo -n "${specD}${specF} " && read spec ; do
61 case $spec in
62 HOST)
63 echo -n 'New host: '
64 read host
65 continue
66 ;;
67 PORT)
68 echo -n 'New port: '
69 read port
70 continue
71 ;;
72 SAVE)
73 echo -n 'Save file: '
74 read saving
75# if we've already got a page, save it
76 test "${saving}" && test -f ${TMPAGE} &&
77 echo "=== ${host}:${specD}${specF} ===" >> $saving &&
78 cat ${TMPAGE} >> $saving && echo '' >> $saving
79 continue
80 ;;
81# changing the logic a bit here. Keep a state-concept of "current dir"
82# and "current file". Dir is /foo/bar/ ; file is "baz" or null.
83# leading slash: create whole new state.
84 /*)
85 specF=`echo "${spec}" | sed 's|.*/||'`
86 specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'`
87 spec="${specD}${specF}"
88 ;;
89# embedded slash: adding to the path. "file" part can be blank, too
90 */*)
91 specF=`echo "${spec}" | sed 's|.*/||'`
92 specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'`
93 ;;
94# dotdot: jump "up" one level and just reprompt [confirms what it did...]
95 ..)
96 specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'`
97 continue
98 ;;
99# blank line: do nothing, which will re-get the current one
100 '')
101 ;;
102# hack-quoted blank line: "\" means just zero out "file" part
103 '\')
104 specF=''
105 ;;
106# sigh
107 '?')
108 echo Help yourself. Read the script fer krissake.
109 continue
110 ;;
111# anything else is taken as a "file" part
112 *)
113 specF=${spec}
114 ;;
115 esac
116
117# now put it together and stuff it down a connection. Some lame non-unix
118# http servers assume they'll never get simple-query format, and wait till
119# an extra newline arrives. If you're up against one of these, change
120# below to (echo GET "$spec" ; echo '') | $BACKEND ...
121 spec="${specD}${specF}"
122 echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE}
123 ${PAGER} ${TMPAGE}
124
125# save in a format that still shows the URLs we hit after a de-html run
126 if test "${saving}" ; then
127 echo "=== ${host}:${spec} ===" >> $saving
128 cat ${TMPAGE} >> $saving
129 echo '' >> $saving
130 fi
131done
132rm -f ${TMPAGE}
133exit 0
134
135#######
136# Encoding notes, finally from RFC 1738:
137# %XX -- hex-encode of special chars
138# allowed alphas in a URL: $_-.+!*'(),
139# relative names *not* described, but obviously used all over the place
140# transport://user:pass@host:port/path/name?query-string
141# wais: port 210, //host:port/database?search or /database/type/file?
142# cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz
143# ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords!
144# local access-ctl files: ncsa: .htaccess ; cern: .www_acl
145#######
146# SEARCH ENGINES: fortunately, all are GET forms or at least work that way...
147# multi-word args for most cases: foo+bar
148# See 'websearch' for concise results of this research...
diff --git a/src/usr.bin/nc/scripts/webproxy b/src/usr.bin/nc/scripts/webproxy
new file mode 100644
index 0000000000..cee2d29fd1
--- /dev/null
+++ b/src/usr.bin/nc/scripts/webproxy
@@ -0,0 +1,138 @@
1#! /bin/sh
2## Web proxy, following the grand tradition of Web things being handled by
3## gross scripts. Uses netcat to listen on a high port [default 8000],
4## picks apart requests and sends them on to the right place. Point this
5## at the browser client machine you'll be coming from [to limit access to
6## only it], and point the browser's concept of an HTTP proxy to the
7## machine running this. Takes a single argument of the client that will
8## be using it, and rejects connections from elsewhere. LOGS the queries
9## to a configurable logfile, which can be an interesting read later on!
10## If the argument is "reset", the listener and logfile are cleaned up.
11##
12## This works surprisingly fast and well, for a shell script, although may
13## randomly fail when hammered by a browser that tries to open several
14## connections at once. Drop the "maximum connections" in your browser if
15## this is a problem.
16##
17## A more degenerate case of this, or preferably a small C program that
18## does the same thing under inetd, could handle a small site's worth of
19## proxy queries. Given the way browsers are evolving, proxies like this
20## can play an important role in protecting your own privacy.
21##
22## If you grabbed this in ASCII mode, search down for "eew" and make sure
23## the embedded-CR check is intact, or requests might hang.
24##
25## Doesn't handle POST forms. Who cares, if you're just watching HTTV?
26## Dumbness here has a highly desirable side effect: it only sends the first
27## GET line, since that's all you really ever need to send, and suppresses
28## the other somewhat revealing trash that most browsers insist on sending.
29
30# set these as you wish: proxy port...
31PORT=8000
32# logfile spec: a real file or /dev/null if you don't care
33LFILE=${0}.log
34# optional: where to dump connect info, so you can see if anything went wrong
35# CFILE=${0}.conn
36# optional extra args to the listener "nc", for instance "-s inside-net-addr"
37# XNC=''
38
39# functionality switch has to be done fast, so the next listener can start
40# prelaunch check: if no current client and no args, bail.
41case "${1}${CLIENT}" in
42 "")
43 echo needs client hostname
44 exit 1
45 ;;
46esac
47
48case "${1}" in
49 "")
50# Make like inetd, and run the next relayer process NOW. All the redirection
51# is necessary so this shell has NO remaining channel open to the net.
52# This will hang around for 10 minutes, and exit if no new connections arrive.
53# Using -n for speed, avoiding any DNS/port lookups.
54 nc -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" < /dev/null > /dev/null \
55 2> $CFILE &
56 ;;
57esac
58
59# no client yet and had an arg, this checking can be much slower now
60umask 077
61
62if test "$1" ; then
63# if magic arg, just clean up and then hit our own port to cause server exit
64 if test "$1" = "reset" ; then
65 rm -f $LFILE
66 test -f "$CFILE" && rm -f $CFILE
67 nc -w 1 -n 127.0.0.1 $PORT < /dev/null > /dev/null 2>&1
68 exit 0
69 fi
70# find our ass with both hands
71 test ! -f "$0" && echo "Oops, cannot find my own corporeal being" && exit 1
72# correct launch: set up client access control, passed along thru environment.
73 CLIENT="$1"
74 export CLIENT
75 test "$CFILE" || CFILE=/dev/null
76 export CFILE
77 touch "$CFILE"
78# tell us what happened during the last run, if possible
79 if test -f "$CFILE" ; then
80 echo "Last connection results:"
81 cat $CFILE
82 fi
83
84# ping client machine and get its bare IP address
85 CLIENT=`nc -z -v -w 8 "$1" 22000 2>&1 | sed 's/.*\[\(..*\)\].*/\1/'`
86 test ! "$CLIENT" && echo "Can't find address of $1" && exit 1
87
88# if this was an initial launch, be informative about it
89 echo "=== Launch: $CLIENT" >> $LFILE
90 echo "Proxy running -- will accept connections on $PORT from $CLIENT"
91 echo " Logging queries to $LFILE"
92 test -f "$CFILE" && echo " and connection fuckups to $CFILE"
93
94# and run the first listener, showing us output just for the first hit
95 nc -v -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" &
96 exit 0
97fi
98
99# Fall here to handle a page.
100# GET type://host.name:80/file/path HTTP/1.0
101# Additional: trash
102# More: trash
103# <newline>
104
105read x1 x2 x3 x4
106echo "=== query: $x1 $x2 $x3 $x4" >> $LFILE
107test "$x4" && echo "extra junk after request: $x4" && exit 0
108# nuke questionable characters and split up the request
109hurl=`echo "$x2" | sed -e "s+.*//++" -e 's+[\`'\''|$;<>{}\\!*()"]++g'`
110# echo massaged hurl: $hurl >> $LFILE
111hh=`echo "$hurl" | sed -e "s+/.*++" -e "s+:.*++"`
112hp=`echo "$hurl" | sed -e "s+.*:++" -e "s+/.*++"`
113test "$hp" = "$hh" && hp=80
114hf=`echo "$hurl" | sed -e "s+[^/]*++"`
115# echo total split: $hh : $hp : $hf >> $LFILE
116# suck in and log the entire request, because we're curious
117# Fails on multipart stuff like forms; oh well...
118if test "$x3" ; then
119 while read xx ; do
120 echo "${xx}" >> $LFILE
121 test "${xx}" || break
122# eew, buried returns, gross but necessary for DOS stupidity:
123 test "${xx}" = " " && break
124 done
125fi
126# check for non-GET *after* we log the query...
127test "$x1" != "GET" && echo "sorry, this proxy only does GETs" && exit 0
128# no, you can *not* phone home, you miserable piece of shit
129test "`echo $hh | fgrep -i netscap`" && \
130 echo "access to Netscam's servers <b>DENIED.</b>" && exit 0
131# Do it. 30 sec net-wait time oughta be *plenty*...
132# Some braindead servers have forgotten how to handle the simple-query syntax.
133# If necessary, replace below with (echo "$x1 $hf" ; echo '') | nc...
134echo "$x1 $hf" | nc -w 30 "$hh" "$hp" 2> /dev/null || \
135 echo "oops, can't get to $hh : $hp".
136echo "sent \"$x1 $hf\" to $hh : $hp" >> $LFILE
137exit 0
138
diff --git a/src/usr.bin/nc/scripts/webrelay b/src/usr.bin/nc/scripts/webrelay
new file mode 100644
index 0000000000..829a8b0708
--- /dev/null
+++ b/src/usr.bin/nc/scripts/webrelay
@@ -0,0 +1,44 @@
1#! /bin/sh
2## web relay -- a degenerate version of webproxy, usable with browsers that
3## don't understand proxies. This just forwards connections to a given server.
4## No query logging, no access control [although you can add it to XNC for
5## your own run], and full-URL links will undoubtedly confuse the browser
6## if it can't reach the server directly. This was actually written before
7## the full proxy was, and it shows.
8## The arguments in this case are the destination server and optional port.
9## Please flame pinheads who use self-referential absolute links.
10
11# set these as you wish: proxy port...
12PORT=8000
13# any extra args to the listening "nc", for instance "-s inside-net-addr"
14XNC=''
15
16# functionality switch, which has to be done fast to start the next listener
17case "${1}${RDEST}" in
18 "")
19 echo needs hostname
20 exit 1
21 ;;
22esac
23
24case "${1}" in
25 "")
26# no args: fire off new relayer process NOW. Will hang around for 10 minutes
27 nc -w 600 -l -n -p $PORT -e "$0" $XNC < /dev/null > /dev/null 2>&1 &
28# and handle this request, which will simply fail if vars not set yet.
29 exec nc -w 15 $RDEST $RPORT
30 ;;
31esac
32
33# Fall here for setup; this can now be slower.
34RDEST="$1"
35RPORT="$2"
36test "$RPORT" || RPORT=80
37export RDEST RPORT
38
39# Launch the first relayer same as above, but let its error msgs show up
40# will hang around for a minute, and exit if no new connections arrive.
41nc -v -w 600 -l -p $PORT -e "$0" $XNC < /dev/null > /dev/null &
42echo \
43 "Relay to ${RDEST}:${RPORT} running -- point your browser here on port $PORT"
44exit 0
diff --git a/src/usr.bin/nc/scripts/websearch b/src/usr.bin/nc/scripts/websearch
new file mode 100644
index 0000000000..60c3a3356a
--- /dev/null
+++ b/src/usr.bin/nc/scripts/websearch
@@ -0,0 +1,77 @@
1#! /bin/sh
2## Hit the major search engines. Hose the [large] output to a file!
3## autoconverts multiple arguments into the right format for given servers --
4## usually worda+wordb, with certain lame exceptions like dejanews.
5## Extracting and post-sorting the URLs is highly recommended...
6##
7## Altavista currently handled by a separate script; may merge at some point.
8##
9## _H* original 950824, updated 951218 and 960209
10
11test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1
12PLUSARG="`echo $* | sed 's/ /+/g'`"
13PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`"
14IFILE=/tmp/.webq.$$
15
16# Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit.
17doquery () {
18 echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3"
19}
20
21# changed since original: now supplying port numbers and separator lines...
22
23echo "=== Yahoo ==="
24doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80
25
26echo '' ; echo "=== Webcrawler ==="
27doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80
28
29# the infoseek lamers want "registration" before they do a real search, but...
30echo '' ; echo "=== Infoseek ==="
31echo " is broken."
32# doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80
33# ... which doesn't work cuz their lame server wants the extra newlines, WITH
34# CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If
35# you want to play, the basic idea and query formats follow.
36# echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE
37# echo "" >> $IFILE
38# nc -v -w 30 guide-p.infoseek.com 80 < $IFILE
39
40# this is kinda flakey; might have to do twice??
41echo '' ; echo "=== Opentext ==="
42doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \
43 search.opentext.com 80
44
45# looks like inktomi will only take hits=100, or defaults back to 30
46# we try to suppress all the stupid rating dots here, too
47echo '' ; echo "=== Inktomi ==="
48doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \
49 sed '/^<IMG ALT.*inktomi.*\.gif">$/d'
50
51#djnews lame shit limits hits to 120 and has nonstandard format
52echo '' ; echo "=== Dejanews ==="
53doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \
54 smithers.dejanews.com 80
55
56# OLD lycos: used to work until they fucking BROKE it...
57# doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \
58# query5.lycos.cs.cmu.edu 80
59# NEW lycos: wants the User-agent field present in query or it returns nothing
60# 960206: webmaster@lycos duly bitched at
61# 960208: reply received; here's how we will now handle it:
62echo \
63"GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \
64 > $IFILE
65echo "User-agent: *FUCK OFF*" >> $IFILE
66echo "Why: go ask todd@pointcom.com (Todd Whitney)" >> $IFILE
67echo '' >> $IFILE
68echo '' ; echo "=== Lycos ==="
69nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE
70
71rm -f $IFILE
72exit 0
73
74# CURRENTLY BROKEN [?]
75# infoseek
76
77# some args need to be redone to ensure whatever "and" mode applies