diff options
Diffstat (limited to '')
-rw-r--r-- | src/usr.bin/nc/scripts/web | 148 | ||||
-rw-r--r-- | src/usr.bin/nc/scripts/webproxy | 138 | ||||
-rw-r--r-- | src/usr.bin/nc/scripts/webrelay | 44 | ||||
-rw-r--r-- | src/usr.bin/nc/scripts/websearch | 77 |
4 files changed, 407 insertions, 0 deletions
diff --git a/src/usr.bin/nc/scripts/web b/src/usr.bin/nc/scripts/web new file mode 100644 index 0000000000..382b18e1e3 --- /dev/null +++ b/src/usr.bin/nc/scripts/web | |||
@@ -0,0 +1,148 @@ | |||
1 | #! /bin/sh | ||
2 | ## The web sucks. It is a mighty dismal kludge built out of a thousand | ||
3 | ## tiny dismal kludges all band-aided together, and now these bottom-line | ||
4 | ## clueless pinheads who never heard of "TCP handshake" want to run | ||
5 | ## *commerce* over the damn thing. Ye godz. Welcome to TV of the next | ||
6 | ## century -- six million channels of worthless shit to choose from, and | ||
7 | ## about as much security as today's cable industry! | ||
8 | ## | ||
9 | ## Having grown mightily tired of pain in the ass browsers, I decided | ||
10 | ## to build the minimalist client. It doesn't handle POST, just GETs, but | ||
11 | ## the majority of cgi forms handlers apparently ignore the method anyway. | ||
12 | ## A distinct advantage is that it *doesn't* pass on any other information | ||
13 | ## to the server, like Referer: or info about your local machine such as | ||
14 | ## Netscum tries to! | ||
15 | ## | ||
16 | ## Since the first version, this has become the *almost*-minimalist client, | ||
17 | ## but it saves a lot of typing now. And with netcat as its backend, it's | ||
18 | ## totally the balls. Don't have netcat? Get it here in /src/hacks! | ||
19 | ## _H* 950824, updated 951009 et seq. | ||
20 | ## | ||
21 | ## args: hostname [port]. You feed it the filename-parts of URLs. | ||
22 | ## In the loop, HOST, PORT, and SAVE do the right things; a null line | ||
23 | ## gets the previous spec again [useful for initial timeouts]; EOF to exit. | ||
24 | ## Relative URLs behave like a "cd" to wherever the last slash appears, or | ||
25 | ## just use the last component with the saved preceding "directory" part. | ||
26 | ## "\" clears the "filename" part and asks for just the "directory", and | ||
27 | ## ".." goes up one "directory" level while retaining the "filename" part. | ||
28 | ## Play around; you'll get used to it. | ||
29 | |||
30 | if test "$1" = "" ; then | ||
31 | echo Needs hostname arg. | ||
32 | exit 1 | ||
33 | fi | ||
34 | umask 022 | ||
35 | |||
36 | # optional PATH fixup | ||
37 | # PATH=${HOME}:${PATH} ; export PATH | ||
38 | |||
39 | test "${PAGER}" || PAGER=more | ||
40 | BACKEND="nc -v -w 15" | ||
41 | TMPAGE=/tmp/web$$ | ||
42 | host="$1" | ||
43 | port="80" | ||
44 | if test "$2" != "" ; then | ||
45 | port="$2" | ||
46 | fi | ||
47 | |||
48 | spec="/" | ||
49 | specD="/" | ||
50 | specF='' | ||
51 | saving='' | ||
52 | |||
53 | # be vaguely smart about temp file usage. Use your own homedir if you're | ||
54 | # paranoid about someone symlink-racing your shell script, jeez. | ||
55 | rm -f ${TMPAGE} | ||
56 | test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1 | ||
57 | |||
58 | # get loopy. Yes, I know "echo -n" aint portable. Everything echoed would | ||
59 | # need "\c" tacked onto the end in an SV universe, which you can fix yourself. | ||
60 | while echo -n "${specD}${specF} " && read spec ; do | ||
61 | case $spec in | ||
62 | HOST) | ||
63 | echo -n 'New host: ' | ||
64 | read host | ||
65 | continue | ||
66 | ;; | ||
67 | PORT) | ||
68 | echo -n 'New port: ' | ||
69 | read port | ||
70 | continue | ||
71 | ;; | ||
72 | SAVE) | ||
73 | echo -n 'Save file: ' | ||
74 | read saving | ||
75 | # if we've already got a page, save it | ||
76 | test "${saving}" && test -f ${TMPAGE} && | ||
77 | echo "=== ${host}:${specD}${specF} ===" >> $saving && | ||
78 | cat ${TMPAGE} >> $saving && echo '' >> $saving | ||
79 | continue | ||
80 | ;; | ||
81 | # changing the logic a bit here. Keep a state-concept of "current dir" | ||
82 | # and "current file". Dir is /foo/bar/ ; file is "baz" or null. | ||
83 | # leading slash: create whole new state. | ||
84 | /*) | ||
85 | specF=`echo "${spec}" | sed 's|.*/||'` | ||
86 | specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'` | ||
87 | spec="${specD}${specF}" | ||
88 | ;; | ||
89 | # embedded slash: adding to the path. "file" part can be blank, too | ||
90 | */*) | ||
91 | specF=`echo "${spec}" | sed 's|.*/||'` | ||
92 | specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'` | ||
93 | ;; | ||
94 | # dotdot: jump "up" one level and just reprompt [confirms what it did...] | ||
95 | ..) | ||
96 | specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'` | ||
97 | continue | ||
98 | ;; | ||
99 | # blank line: do nothing, which will re-get the current one | ||
100 | '') | ||
101 | ;; | ||
102 | # hack-quoted blank line: "\" means just zero out "file" part | ||
103 | '\') | ||
104 | specF='' | ||
105 | ;; | ||
106 | # sigh | ||
107 | '?') | ||
108 | echo Help yourself. Read the script fer krissake. | ||
109 | continue | ||
110 | ;; | ||
111 | # anything else is taken as a "file" part | ||
112 | *) | ||
113 | specF=${spec} | ||
114 | ;; | ||
115 | esac | ||
116 | |||
117 | # now put it together and stuff it down a connection. Some lame non-unix | ||
118 | # http servers assume they'll never get simple-query format, and wait till | ||
119 | # an extra newline arrives. If you're up against one of these, change | ||
120 | # below to (echo GET "$spec" ; echo '') | $BACKEND ... | ||
121 | spec="${specD}${specF}" | ||
122 | echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE} | ||
123 | ${PAGER} ${TMPAGE} | ||
124 | |||
125 | # save in a format that still shows the URLs we hit after a de-html run | ||
126 | if test "${saving}" ; then | ||
127 | echo "=== ${host}:${spec} ===" >> $saving | ||
128 | cat ${TMPAGE} >> $saving | ||
129 | echo '' >> $saving | ||
130 | fi | ||
131 | done | ||
132 | rm -f ${TMPAGE} | ||
133 | exit 0 | ||
134 | |||
135 | ####### | ||
136 | # Encoding notes, finally from RFC 1738: | ||
137 | # %XX -- hex-encode of special chars | ||
138 | # allowed alphas in a URL: $_-.+!*'(), | ||
139 | # relative names *not* described, but obviously used all over the place | ||
140 | # transport://user:pass@host:port/path/name?query-string | ||
141 | # wais: port 210, //host:port/database?search or /database/type/file? | ||
142 | # cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz | ||
143 | # ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords! | ||
144 | # local access-ctl files: ncsa: .htaccess ; cern: .www_acl | ||
145 | ####### | ||
146 | # SEARCH ENGINES: fortunately, all are GET forms or at least work that way... | ||
147 | # multi-word args for most cases: foo+bar | ||
148 | # See 'websearch' for concise results of this research... | ||
diff --git a/src/usr.bin/nc/scripts/webproxy b/src/usr.bin/nc/scripts/webproxy new file mode 100644 index 0000000000..cee2d29fd1 --- /dev/null +++ b/src/usr.bin/nc/scripts/webproxy | |||
@@ -0,0 +1,138 @@ | |||
1 | #! /bin/sh | ||
2 | ## Web proxy, following the grand tradition of Web things being handled by | ||
3 | ## gross scripts. Uses netcat to listen on a high port [default 8000], | ||
4 | ## picks apart requests and sends them on to the right place. Point this | ||
5 | ## at the browser client machine you'll be coming from [to limit access to | ||
6 | ## only it], and point the browser's concept of an HTTP proxy to the | ||
7 | ## machine running this. Takes a single argument of the client that will | ||
8 | ## be using it, and rejects connections from elsewhere. LOGS the queries | ||
9 | ## to a configurable logfile, which can be an interesting read later on! | ||
10 | ## If the argument is "reset", the listener and logfile are cleaned up. | ||
11 | ## | ||
12 | ## This works surprisingly fast and well, for a shell script, although may | ||
13 | ## randomly fail when hammered by a browser that tries to open several | ||
14 | ## connections at once. Drop the "maximum connections" in your browser if | ||
15 | ## this is a problem. | ||
16 | ## | ||
17 | ## A more degenerate case of this, or preferably a small C program that | ||
18 | ## does the same thing under inetd, could handle a small site's worth of | ||
19 | ## proxy queries. Given the way browsers are evolving, proxies like this | ||
20 | ## can play an important role in protecting your own privacy. | ||
21 | ## | ||
22 | ## If you grabbed this in ASCII mode, search down for "eew" and make sure | ||
23 | ## the embedded-CR check is intact, or requests might hang. | ||
24 | ## | ||
25 | ## Doesn't handle POST forms. Who cares, if you're just watching HTTV? | ||
26 | ## Dumbness here has a highly desirable side effect: it only sends the first | ||
27 | ## GET line, since that's all you really ever need to send, and suppresses | ||
28 | ## the other somewhat revealing trash that most browsers insist on sending. | ||
29 | |||
30 | # set these as you wish: proxy port... | ||
31 | PORT=8000 | ||
32 | # logfile spec: a real file or /dev/null if you don't care | ||
33 | LFILE=${0}.log | ||
34 | # optional: where to dump connect info, so you can see if anything went wrong | ||
35 | # CFILE=${0}.conn | ||
36 | # optional extra args to the listener "nc", for instance "-s inside-net-addr" | ||
37 | # XNC='' | ||
38 | |||
39 | # functionality switch has to be done fast, so the next listener can start | ||
40 | # prelaunch check: if no current client and no args, bail. | ||
41 | case "${1}${CLIENT}" in | ||
42 | "") | ||
43 | echo needs client hostname | ||
44 | exit 1 | ||
45 | ;; | ||
46 | esac | ||
47 | |||
48 | case "${1}" in | ||
49 | "") | ||
50 | # Make like inetd, and run the next relayer process NOW. All the redirection | ||
51 | # is necessary so this shell has NO remaining channel open to the net. | ||
52 | # This will hang around for 10 minutes, and exit if no new connections arrive. | ||
53 | # Using -n for speed, avoiding any DNS/port lookups. | ||
54 | nc -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" < /dev/null > /dev/null \ | ||
55 | 2> $CFILE & | ||
56 | ;; | ||
57 | esac | ||
58 | |||
59 | # no client yet and had an arg, this checking can be much slower now | ||
60 | umask 077 | ||
61 | |||
62 | if test "$1" ; then | ||
63 | # if magic arg, just clean up and then hit our own port to cause server exit | ||
64 | if test "$1" = "reset" ; then | ||
65 | rm -f $LFILE | ||
66 | test -f "$CFILE" && rm -f $CFILE | ||
67 | nc -w 1 -n 127.0.0.1 $PORT < /dev/null > /dev/null 2>&1 | ||
68 | exit 0 | ||
69 | fi | ||
70 | # find our ass with both hands | ||
71 | test ! -f "$0" && echo "Oops, cannot find my own corporeal being" && exit 1 | ||
72 | # correct launch: set up client access control, passed along thru environment. | ||
73 | CLIENT="$1" | ||
74 | export CLIENT | ||
75 | test "$CFILE" || CFILE=/dev/null | ||
76 | export CFILE | ||
77 | touch "$CFILE" | ||
78 | # tell us what happened during the last run, if possible | ||
79 | if test -f "$CFILE" ; then | ||
80 | echo "Last connection results:" | ||
81 | cat $CFILE | ||
82 | fi | ||
83 | |||
84 | # ping client machine and get its bare IP address | ||
85 | CLIENT=`nc -z -v -w 8 "$1" 22000 2>&1 | sed 's/.*\[\(..*\)\].*/\1/'` | ||
86 | test ! "$CLIENT" && echo "Can't find address of $1" && exit 1 | ||
87 | |||
88 | # if this was an initial launch, be informative about it | ||
89 | echo "=== Launch: $CLIENT" >> $LFILE | ||
90 | echo "Proxy running -- will accept connections on $PORT from $CLIENT" | ||
91 | echo " Logging queries to $LFILE" | ||
92 | test -f "$CFILE" && echo " and connection fuckups to $CFILE" | ||
93 | |||
94 | # and run the first listener, showing us output just for the first hit | ||
95 | nc -v -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" & | ||
96 | exit 0 | ||
97 | fi | ||
98 | |||
99 | # Fall here to handle a page. | ||
100 | # GET type://host.name:80/file/path HTTP/1.0 | ||
101 | # Additional: trash | ||
102 | # More: trash | ||
103 | # <newline> | ||
104 | |||
105 | read x1 x2 x3 x4 | ||
106 | echo "=== query: $x1 $x2 $x3 $x4" >> $LFILE | ||
107 | test "$x4" && echo "extra junk after request: $x4" && exit 0 | ||
108 | # nuke questionable characters and split up the request | ||
109 | hurl=`echo "$x2" | sed -e "s+.*//++" -e 's+[\`'\''|$;<>{}\\!*()"]++g'` | ||
110 | # echo massaged hurl: $hurl >> $LFILE | ||
111 | hh=`echo "$hurl" | sed -e "s+/.*++" -e "s+:.*++"` | ||
112 | hp=`echo "$hurl" | sed -e "s+.*:++" -e "s+/.*++"` | ||
113 | test "$hp" = "$hh" && hp=80 | ||
114 | hf=`echo "$hurl" | sed -e "s+[^/]*++"` | ||
115 | # echo total split: $hh : $hp : $hf >> $LFILE | ||
116 | # suck in and log the entire request, because we're curious | ||
117 | # Fails on multipart stuff like forms; oh well... | ||
118 | if test "$x3" ; then | ||
119 | while read xx ; do | ||
120 | echo "${xx}" >> $LFILE | ||
121 | test "${xx}" || break | ||
122 | # eew, buried returns, gross but necessary for DOS stupidity: | ||
123 | test "${xx}" = " " && break | ||
124 | done | ||
125 | fi | ||
126 | # check for non-GET *after* we log the query... | ||
127 | test "$x1" != "GET" && echo "sorry, this proxy only does GETs" && exit 0 | ||
128 | # no, you can *not* phone home, you miserable piece of shit | ||
129 | test "`echo $hh | fgrep -i netscap`" && \ | ||
130 | echo "access to Netscam's servers <b>DENIED.</b>" && exit 0 | ||
131 | # Do it. 30 sec net-wait time oughta be *plenty*... | ||
132 | # Some braindead servers have forgotten how to handle the simple-query syntax. | ||
133 | # If necessary, replace below with (echo "$x1 $hf" ; echo '') | nc... | ||
134 | echo "$x1 $hf" | nc -w 30 "$hh" "$hp" 2> /dev/null || \ | ||
135 | echo "oops, can't get to $hh : $hp". | ||
136 | echo "sent \"$x1 $hf\" to $hh : $hp" >> $LFILE | ||
137 | exit 0 | ||
138 | |||
diff --git a/src/usr.bin/nc/scripts/webrelay b/src/usr.bin/nc/scripts/webrelay new file mode 100644 index 0000000000..829a8b0708 --- /dev/null +++ b/src/usr.bin/nc/scripts/webrelay | |||
@@ -0,0 +1,44 @@ | |||
1 | #! /bin/sh | ||
2 | ## web relay -- a degenerate version of webproxy, usable with browsers that | ||
3 | ## don't understand proxies. This just forwards connections to a given server. | ||
4 | ## No query logging, no access control [although you can add it to XNC for | ||
5 | ## your own run], and full-URL links will undoubtedly confuse the browser | ||
6 | ## if it can't reach the server directly. This was actually written before | ||
7 | ## the full proxy was, and it shows. | ||
8 | ## The arguments in this case are the destination server and optional port. | ||
9 | ## Please flame pinheads who use self-referential absolute links. | ||
10 | |||
11 | # set these as you wish: proxy port... | ||
12 | PORT=8000 | ||
13 | # any extra args to the listening "nc", for instance "-s inside-net-addr" | ||
14 | XNC='' | ||
15 | |||
16 | # functionality switch, which has to be done fast to start the next listener | ||
17 | case "${1}${RDEST}" in | ||
18 | "") | ||
19 | echo needs hostname | ||
20 | exit 1 | ||
21 | ;; | ||
22 | esac | ||
23 | |||
24 | case "${1}" in | ||
25 | "") | ||
26 | # no args: fire off new relayer process NOW. Will hang around for 10 minutes | ||
27 | nc -w 600 -l -n -p $PORT -e "$0" $XNC < /dev/null > /dev/null 2>&1 & | ||
28 | # and handle this request, which will simply fail if vars not set yet. | ||
29 | exec nc -w 15 $RDEST $RPORT | ||
30 | ;; | ||
31 | esac | ||
32 | |||
33 | # Fall here for setup; this can now be slower. | ||
34 | RDEST="$1" | ||
35 | RPORT="$2" | ||
36 | test "$RPORT" || RPORT=80 | ||
37 | export RDEST RPORT | ||
38 | |||
39 | # Launch the first relayer same as above, but let its error msgs show up | ||
40 | # will hang around for a minute, and exit if no new connections arrive. | ||
41 | nc -v -w 600 -l -p $PORT -e "$0" $XNC < /dev/null > /dev/null & | ||
42 | echo \ | ||
43 | "Relay to ${RDEST}:${RPORT} running -- point your browser here on port $PORT" | ||
44 | exit 0 | ||
diff --git a/src/usr.bin/nc/scripts/websearch b/src/usr.bin/nc/scripts/websearch new file mode 100644 index 0000000000..60c3a3356a --- /dev/null +++ b/src/usr.bin/nc/scripts/websearch | |||
@@ -0,0 +1,77 @@ | |||
1 | #! /bin/sh | ||
2 | ## Hit the major search engines. Hose the [large] output to a file! | ||
3 | ## autoconverts multiple arguments into the right format for given servers -- | ||
4 | ## usually worda+wordb, with certain lame exceptions like dejanews. | ||
5 | ## Extracting and post-sorting the URLs is highly recommended... | ||
6 | ## | ||
7 | ## Altavista currently handled by a separate script; may merge at some point. | ||
8 | ## | ||
9 | ## _H* original 950824, updated 951218 and 960209 | ||
10 | |||
11 | test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1 | ||
12 | PLUSARG="`echo $* | sed 's/ /+/g'`" | ||
13 | PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`" | ||
14 | IFILE=/tmp/.webq.$$ | ||
15 | |||
16 | # Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit. | ||
17 | doquery () { | ||
18 | echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3" | ||
19 | } | ||
20 | |||
21 | # changed since original: now supplying port numbers and separator lines... | ||
22 | |||
23 | echo "=== Yahoo ===" | ||
24 | doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80 | ||
25 | |||
26 | echo '' ; echo "=== Webcrawler ===" | ||
27 | doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80 | ||
28 | |||
29 | # the infoseek lamers want "registration" before they do a real search, but... | ||
30 | echo '' ; echo "=== Infoseek ===" | ||
31 | echo " is broken." | ||
32 | # doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80 | ||
33 | # ... which doesn't work cuz their lame server wants the extra newlines, WITH | ||
34 | # CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If | ||
35 | # you want to play, the basic idea and query formats follow. | ||
36 | # echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE | ||
37 | # echo "" >> $IFILE | ||
38 | # nc -v -w 30 guide-p.infoseek.com 80 < $IFILE | ||
39 | |||
40 | # this is kinda flakey; might have to do twice?? | ||
41 | echo '' ; echo "=== Opentext ===" | ||
42 | doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \ | ||
43 | search.opentext.com 80 | ||
44 | |||
45 | # looks like inktomi will only take hits=100, or defaults back to 30 | ||
46 | # we try to suppress all the stupid rating dots here, too | ||
47 | echo '' ; echo "=== Inktomi ===" | ||
48 | doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \ | ||
49 | sed '/^<IMG ALT.*inktomi.*\.gif">$/d' | ||
50 | |||
51 | #djnews lame shit limits hits to 120 and has nonstandard format | ||
52 | echo '' ; echo "=== Dejanews ===" | ||
53 | doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \ | ||
54 | smithers.dejanews.com 80 | ||
55 | |||
56 | # OLD lycos: used to work until they fucking BROKE it... | ||
57 | # doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \ | ||
58 | # query5.lycos.cs.cmu.edu 80 | ||
59 | # NEW lycos: wants the User-agent field present in query or it returns nothing | ||
60 | # 960206: webmaster@lycos duly bitched at | ||
61 | # 960208: reply received; here's how we will now handle it: | ||
62 | echo \ | ||
63 | "GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \ | ||
64 | > $IFILE | ||
65 | echo "User-agent: *FUCK OFF*" >> $IFILE | ||
66 | echo "Why: go ask todd@pointcom.com (Todd Whitney)" >> $IFILE | ||
67 | echo '' >> $IFILE | ||
68 | echo '' ; echo "=== Lycos ===" | ||
69 | nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE | ||
70 | |||
71 | rm -f $IFILE | ||
72 | exit 0 | ||
73 | |||
74 | # CURRENTLY BROKEN [?] | ||
75 | # infoseek | ||
76 | |||
77 | # some args need to be redone to ensure whatever "and" mode applies | ||