diff options
Diffstat (limited to 'src/usr.bin/nc/scripts/web')
| -rw-r--r-- | src/usr.bin/nc/scripts/web | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/src/usr.bin/nc/scripts/web b/src/usr.bin/nc/scripts/web new file mode 100644 index 0000000000..382b18e1e3 --- /dev/null +++ b/src/usr.bin/nc/scripts/web | |||
| @@ -0,0 +1,148 @@ | |||
| 1 | #! /bin/sh | ||
| 2 | ## The web sucks. It is a mighty dismal kludge built out of a thousand | ||
| 3 | ## tiny dismal kludges all band-aided together, and now these bottom-line | ||
| 4 | ## clueless pinheads who never heard of "TCP handshake" want to run | ||
| 5 | ## *commerce* over the damn thing. Ye godz. Welcome to TV of the next | ||
| 6 | ## century -- six million channels of worthless shit to choose from, and | ||
| 7 | ## about as much security as today's cable industry! | ||
| 8 | ## | ||
| 9 | ## Having grown mightily tired of pain in the ass browsers, I decided | ||
| 10 | ## to build the minimalist client. It doesn't handle POST, just GETs, but | ||
| 11 | ## the majority of cgi forms handlers apparently ignore the method anyway. | ||
| 12 | ## A distinct advantage is that it *doesn't* pass on any other information | ||
| 13 | ## to the server, like Referer: or info about your local machine such as | ||
| 14 | ## Netscum tries to! | ||
| 15 | ## | ||
| 16 | ## Since the first version, this has become the *almost*-minimalist client, | ||
| 17 | ## but it saves a lot of typing now. And with netcat as its backend, it's | ||
| 18 | ## totally the balls. Don't have netcat? Get it here in /src/hacks! | ||
| 19 | ## _H* 950824, updated 951009 et seq. | ||
| 20 | ## | ||
| 21 | ## args: hostname [port]. You feed it the filename-parts of URLs. | ||
| 22 | ## In the loop, HOST, PORT, and SAVE do the right things; a null line | ||
| 23 | ## gets the previous spec again [useful for initial timeouts]; EOF to exit. | ||
| 24 | ## Relative URLs behave like a "cd" to wherever the last slash appears, or | ||
| 25 | ## just use the last component with the saved preceding "directory" part. | ||
| 26 | ## "\" clears the "filename" part and asks for just the "directory", and | ||
| 27 | ## ".." goes up one "directory" level while retaining the "filename" part. | ||
| 28 | ## Play around; you'll get used to it. | ||
| 29 | |||
| 30 | if test "$1" = "" ; then | ||
| 31 | echo Needs hostname arg. | ||
| 32 | exit 1 | ||
| 33 | fi | ||
| 34 | umask 022 | ||
| 35 | |||
| 36 | # optional PATH fixup | ||
| 37 | # PATH=${HOME}:${PATH} ; export PATH | ||
| 38 | |||
| 39 | test "${PAGER}" || PAGER=more | ||
| 40 | BACKEND="nc -v -w 15" | ||
| 41 | TMPAGE=/tmp/web$$ | ||
| 42 | host="$1" | ||
| 43 | port="80" | ||
| 44 | if test "$2" != "" ; then | ||
| 45 | port="$2" | ||
| 46 | fi | ||
| 47 | |||
| 48 | spec="/" | ||
| 49 | specD="/" | ||
| 50 | specF='' | ||
| 51 | saving='' | ||
| 52 | |||
| 53 | # be vaguely smart about temp file usage. Use your own homedir if you're | ||
| 54 | # paranoid about someone symlink-racing your shell script, jeez. | ||
| 55 | rm -f ${TMPAGE} | ||
| 56 | test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1 | ||
| 57 | |||
| 58 | # get loopy. Yes, I know "echo -n" aint portable. Everything echoed would | ||
| 59 | # need "\c" tacked onto the end in an SV universe, which you can fix yourself. | ||
| 60 | while echo -n "${specD}${specF} " && read spec ; do | ||
| 61 | case $spec in | ||
| 62 | HOST) | ||
| 63 | echo -n 'New host: ' | ||
| 64 | read host | ||
| 65 | continue | ||
| 66 | ;; | ||
| 67 | PORT) | ||
| 68 | echo -n 'New port: ' | ||
| 69 | read port | ||
| 70 | continue | ||
| 71 | ;; | ||
| 72 | SAVE) | ||
| 73 | echo -n 'Save file: ' | ||
| 74 | read saving | ||
| 75 | # if we've already got a page, save it | ||
| 76 | test "${saving}" && test -f ${TMPAGE} && | ||
| 77 | echo "=== ${host}:${specD}${specF} ===" >> $saving && | ||
| 78 | cat ${TMPAGE} >> $saving && echo '' >> $saving | ||
| 79 | continue | ||
| 80 | ;; | ||
| 81 | # changing the logic a bit here. Keep a state-concept of "current dir" | ||
| 82 | # and "current file". Dir is /foo/bar/ ; file is "baz" or null. | ||
| 83 | # leading slash: create whole new state. | ||
| 84 | /*) | ||
| 85 | specF=`echo "${spec}" | sed 's|.*/||'` | ||
| 86 | specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'` | ||
| 87 | spec="${specD}${specF}" | ||
| 88 | ;; | ||
| 89 | # embedded slash: adding to the path. "file" part can be blank, too | ||
| 90 | */*) | ||
| 91 | specF=`echo "${spec}" | sed 's|.*/||'` | ||
| 92 | specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'` | ||
| 93 | ;; | ||
| 94 | # dotdot: jump "up" one level and just reprompt [confirms what it did...] | ||
| 95 | ..) | ||
| 96 | specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'` | ||
| 97 | continue | ||
| 98 | ;; | ||
| 99 | # blank line: do nothing, which will re-get the current one | ||
| 100 | '') | ||
| 101 | ;; | ||
| 102 | # hack-quoted blank line: "\" means just zero out "file" part | ||
| 103 | '\') | ||
| 104 | specF='' | ||
| 105 | ;; | ||
| 106 | # sigh | ||
| 107 | '?') | ||
| 108 | echo Help yourself. Read the script fer krissake. | ||
| 109 | continue | ||
| 110 | ;; | ||
| 111 | # anything else is taken as a "file" part | ||
| 112 | *) | ||
| 113 | specF=${spec} | ||
| 114 | ;; | ||
| 115 | esac | ||
| 116 | |||
| 117 | # now put it together and stuff it down a connection. Some lame non-unix | ||
| 118 | # http servers assume they'll never get simple-query format, and wait till | ||
| 119 | # an extra newline arrives. If you're up against one of these, change | ||
| 120 | # below to (echo GET "$spec" ; echo '') | $BACKEND ... | ||
| 121 | spec="${specD}${specF}" | ||
| 122 | echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE} | ||
| 123 | ${PAGER} ${TMPAGE} | ||
| 124 | |||
| 125 | # save in a format that still shows the URLs we hit after a de-html run | ||
| 126 | if test "${saving}" ; then | ||
| 127 | echo "=== ${host}:${spec} ===" >> $saving | ||
| 128 | cat ${TMPAGE} >> $saving | ||
| 129 | echo '' >> $saving | ||
| 130 | fi | ||
| 131 | done | ||
| 132 | rm -f ${TMPAGE} | ||
| 133 | exit 0 | ||
| 134 | |||
| 135 | ####### | ||
| 136 | # Encoding notes, finally from RFC 1738: | ||
| 137 | # %XX -- hex-encode of special chars | ||
| 138 | # allowed alphas in a URL: $_-.+!*'(), | ||
| 139 | # relative names *not* described, but obviously used all over the place | ||
| 140 | # transport://user:pass@host:port/path/name?query-string | ||
| 141 | # wais: port 210, //host:port/database?search or /database/type/file? | ||
| 142 | # cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz | ||
| 143 | # ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords! | ||
| 144 | # local access-ctl files: ncsa: .htaccess ; cern: .www_acl | ||
| 145 | ####### | ||
| 146 | # SEARCH ENGINES: fortunately, all are GET forms or at least work that way... | ||
| 147 | # multi-word args for most cases: foo+bar | ||
| 148 | # See 'websearch' for concise results of this research... | ||
