summaryrefslogtreecommitdiff
path: root/src/usr.bin/nc/scripts/web
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr.bin/nc/scripts/web')
-rw-r--r--src/usr.bin/nc/scripts/web148
1 files changed, 148 insertions, 0 deletions
diff --git a/src/usr.bin/nc/scripts/web b/src/usr.bin/nc/scripts/web
new file mode 100644
index 0000000000..382b18e1e3
--- /dev/null
+++ b/src/usr.bin/nc/scripts/web
@@ -0,0 +1,148 @@
1#! /bin/sh
2## The web sucks. It is a mighty dismal kludge built out of a thousand
3## tiny dismal kludges all band-aided together, and now these bottom-line
4## clueless pinheads who never heard of "TCP handshake" want to run
5## *commerce* over the damn thing. Ye godz. Welcome to TV of the next
6## century -- six million channels of worthless shit to choose from, and
7## about as much security as today's cable industry!
8##
9## Having grown mightily tired of pain in the ass browsers, I decided
10## to build the minimalist client. It doesn't handle POST, just GETs, but
11## the majority of cgi forms handlers apparently ignore the method anyway.
12## A distinct advantage is that it *doesn't* pass on any other information
13## to the server, like Referer: or info about your local machine such as
14## Netscum tries to!
15##
16## Since the first version, this has become the *almost*-minimalist client,
17## but it saves a lot of typing now. And with netcat as its backend, it's
18## totally the balls. Don't have netcat? Get it here in /src/hacks!
19## _H* 950824, updated 951009 et seq.
20##
21## args: hostname [port]. You feed it the filename-parts of URLs.
22## In the loop, HOST, PORT, and SAVE do the right things; a null line
23## gets the previous spec again [useful for initial timeouts]; EOF to exit.
24## Relative URLs behave like a "cd" to wherever the last slash appears, or
25## just use the last component with the saved preceding "directory" part.
26## "\" clears the "filename" part and asks for just the "directory", and
27## ".." goes up one "directory" level while retaining the "filename" part.
28## Play around; you'll get used to it.
29
30if test "$1" = "" ; then
31 echo Needs hostname arg.
32 exit 1
33fi
34umask 022
35
36# optional PATH fixup
37# PATH=${HOME}:${PATH} ; export PATH
38
39test "${PAGER}" || PAGER=more
40BACKEND="nc -v -w 15"
41TMPAGE=/tmp/web$$
42host="$1"
43port="80"
44if test "$2" != "" ; then
45 port="$2"
46fi
47
48spec="/"
49specD="/"
50specF=''
51saving=''
52
53# be vaguely smart about temp file usage. Use your own homedir if you're
54# paranoid about someone symlink-racing your shell script, jeez.
55rm -f ${TMPAGE}
56test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1
57
58# get loopy. Yes, I know "echo -n" aint portable. Everything echoed would
59# need "\c" tacked onto the end in an SV universe, which you can fix yourself.
60while echo -n "${specD}${specF} " && read spec ; do
61 case $spec in
62 HOST)
63 echo -n 'New host: '
64 read host
65 continue
66 ;;
67 PORT)
68 echo -n 'New port: '
69 read port
70 continue
71 ;;
72 SAVE)
73 echo -n 'Save file: '
74 read saving
75# if we've already got a page, save it
76 test "${saving}" && test -f ${TMPAGE} &&
77 echo "=== ${host}:${specD}${specF} ===" >> $saving &&
78 cat ${TMPAGE} >> $saving && echo '' >> $saving
79 continue
80 ;;
81# changing the logic a bit here. Keep a state-concept of "current dir"
82# and "current file". Dir is /foo/bar/ ; file is "baz" or null.
83# leading slash: create whole new state.
84 /*)
85 specF=`echo "${spec}" | sed 's|.*/||'`
86 specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'`
87 spec="${specD}${specF}"
88 ;;
89# embedded slash: adding to the path. "file" part can be blank, too
90 */*)
91 specF=`echo "${spec}" | sed 's|.*/||'`
92 specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'`
93 ;;
94# dotdot: jump "up" one level and just reprompt [confirms what it did...]
95 ..)
96 specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'`
97 continue
98 ;;
99# blank line: do nothing, which will re-get the current one
100 '')
101 ;;
102# hack-quoted blank line: "\" means just zero out "file" part
103 '\')
104 specF=''
105 ;;
106# sigh
107 '?')
108 echo Help yourself. Read the script fer krissake.
109 continue
110 ;;
111# anything else is taken as a "file" part
112 *)
113 specF=${spec}
114 ;;
115 esac
116
117# now put it together and stuff it down a connection. Some lame non-unix
118# http servers assume they'll never get simple-query format, and wait till
119# an extra newline arrives. If you're up against one of these, change
120# below to (echo GET "$spec" ; echo '') | $BACKEND ...
121 spec="${specD}${specF}"
122 echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE}
123 ${PAGER} ${TMPAGE}
124
125# save in a format that still shows the URLs we hit after a de-html run
126 if test "${saving}" ; then
127 echo "=== ${host}:${spec} ===" >> $saving
128 cat ${TMPAGE} >> $saving
129 echo '' >> $saving
130 fi
131done
132rm -f ${TMPAGE}
133exit 0
134
135#######
136# Encoding notes, finally from RFC 1738:
137# %XX -- hex-encode of special chars
138# allowed alphas in a URL: $_-.+!*'(),
139# relative names *not* described, but obviously used all over the place
140# transport://user:pass@host:port/path/name?query-string
141# wais: port 210, //host:port/database?search or /database/type/file?
142# cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz
143# ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords!
144# local access-ctl files: ncsa: .htaccess ; cern: .www_acl
145#######
146# SEARCH ENGINES: fortunately, all are GET forms or at least work that way...
147# multi-word args for most cases: foo+bar
148# See 'websearch' for concise results of this research...