aboutsummaryrefslogtreecommitdiff
path: root/networking/wget.c
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2016-10-19 17:01:55 +0100
committerRon Yorston <rmy@pobox.com>2016-10-19 17:01:55 +0100
commit075814c60a316cfd088c88f26f75ab21b5850b98 (patch)
treef6e33ac693630827deb309faa5fa4931588db57d /networking/wget.c
parent977d65c1bbc57f5cdd0c8bfd67c8b5bb1cd390dd (diff)
parentf37e1155aabde6bd95d267a8aec347cedccb8bc3 (diff)
downloadbusybox-w32-075814c60a316cfd088c88f26f75ab21b5850b98.tar.gz
busybox-w32-075814c60a316cfd088c88f26f75ab21b5850b98.tar.bz2
busybox-w32-075814c60a316cfd088c88f26f75ab21b5850b98.zip
Merge branch busybox (up to "ash: comment out free(p) just before...")
Diffstat (limited to 'networking/wget.c')
-rw-r--r--networking/wget.c130
1 files changed, 100 insertions, 30 deletions
diff --git a/networking/wget.c b/networking/wget.c
index c886dd391..460b4b833 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -62,9 +62,10 @@
62//config: a helper program to talk over HTTPS. 62//config: a helper program to talk over HTTPS.
63//config: 63//config:
64//config: OpenSSL has a simple SSL client for debug purposes. 64//config: OpenSSL has a simple SSL client for debug purposes.
65//config: If you select "openssl" helper, wget will effectively call 65//config: If you select "openssl" helper, wget will effectively run:
66//config: "openssl s_client -quiet -connect IP:443 2>/dev/null" 66//config: "openssl s_client -quiet -connect hostname:443
67//config: and pipe its data through it. 67//config: -servername hostname 2>/dev/null" and pipe its data
68//config: through it. -servername is not used if hostname is numeric.
68//config: Note inconvenient API: host resolution is done twice, 69//config: Note inconvenient API: host resolution is done twice,
69//config: and there is no guarantee openssl's idea of IPv6 address 70//config: and there is no guarantee openssl's idea of IPv6 address
70//config: format is the same as ours. 71//config: format is the same as ours.
@@ -99,7 +100,7 @@
99 100
100//usage:#define wget_trivial_usage 101//usage:#define wget_trivial_usage
101//usage: IF_FEATURE_WGET_LONG_OPTIONS( 102//usage: IF_FEATURE_WGET_LONG_OPTIONS(
102//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" 103//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" 104//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
104/* Since we ignore these opts, we don't show them in --help */ 105/* Since we ignore these opts, we don't show them in --help */
105/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */ 106/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
@@ -107,21 +108,23 @@
107//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." 108//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
108//usage: ) 109//usage: )
109//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( 110//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
110//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" 111//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
111//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." 112//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
112//usage: ) 113//usage: )
113//usage:#define wget_full_usage "\n\n" 114//usage:#define wget_full_usage "\n\n"
114//usage: "Retrieve files via HTTP or FTP\n" 115//usage: "Retrieve files via HTTP or FTP\n"
115//usage: "\n -s Spider mode - only check file existence" 116//usage: IF_FEATURE_WGET_LONG_OPTIONS(
116//usage: "\n -c Continue retrieval of aborted transfer" 117//usage: "\n --spider Spider mode - only check file existence"
117//usage: "\n -q Quiet" 118//usage: )
118//usage: "\n -P DIR Save to DIR (default .)" 119//usage: "\n -c Continue retrieval of aborted transfer"
120//usage: "\n -q Quiet"
121//usage: "\n -P DIR Save to DIR (default .)"
119//usage: IF_FEATURE_WGET_TIMEOUT( 122//usage: IF_FEATURE_WGET_TIMEOUT(
120//usage: "\n -T SEC Network read timeout is SEC seconds" 123//usage: "\n -T SEC Network read timeout is SEC seconds"
121//usage: ) 124//usage: )
122//usage: "\n -O FILE Save to FILE ('-' for stdout)" 125//usage: "\n -O FILE Save to FILE ('-' for stdout)"
123//usage: "\n -U STR Use STR for User-Agent header" 126//usage: "\n -U STR Use STR for User-Agent header"
124//usage: "\n -Y Use proxy ('on' or 'off')" 127//usage: "\n -Y on/off Use proxy"
125 128
126#include "libbb.h" 129#include "libbb.h"
127 130
@@ -228,17 +231,17 @@ struct globals {
228/* Must match option string! */ 231/* Must match option string! */
229enum { 232enum {
230 WGET_OPT_CONTINUE = (1 << 0), 233 WGET_OPT_CONTINUE = (1 << 0),
231 WGET_OPT_SPIDER = (1 << 1), 234 WGET_OPT_QUIET = (1 << 1),
232 WGET_OPT_QUIET = (1 << 2), 235 WGET_OPT_OUTNAME = (1 << 2),
233 WGET_OPT_OUTNAME = (1 << 3), 236 WGET_OPT_PREFIX = (1 << 3),
234 WGET_OPT_PREFIX = (1 << 4), 237 WGET_OPT_PROXY = (1 << 4),
235 WGET_OPT_PROXY = (1 << 5), 238 WGET_OPT_USER_AGENT = (1 << 5),
236 WGET_OPT_USER_AGENT = (1 << 6), 239 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
237 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7), 240 WGET_OPT_RETRIES = (1 << 7),
238 WGET_OPT_RETRIES = (1 << 8), 241 WGET_OPT_nsomething = (1 << 8),
239 WGET_OPT_PASSIVE = (1 << 9), 242 WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
240 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, 243 WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
241 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, 244 WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242}; 245};
243 246
244enum { 247enum {
@@ -349,6 +352,30 @@ static void set_alarm(void)
349# define clear_alarm() ((void)0) 352# define clear_alarm() ((void)0)
350#endif 353#endif
351 354
355#if ENABLE_FEATURE_WGET_OPENSSL
356/*
357 * is_ip_address() attempts to verify whether or not a string
358 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
359 * of inet_pton() can be used to determine this.
360 *
361 * TODO add proper error checking when inet_pton() returns -1
362 * (some form of system error has occurred, and errno is set)
363 */
364static int is_ip_address(const char *string)
365{
366 struct sockaddr_in sa;
367
368 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
369# if ENABLE_FEATURE_IPV6
370 if (result == 0) {
371 struct sockaddr_in6 sa6;
372 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
373 }
374# endif
375 return (result == 1);
376}
377#endif
378
352static FILE *open_socket(len_and_sockaddr *lsa) 379static FILE *open_socket(len_and_sockaddr *lsa)
353{ 380{
354 int fd; 381 int fd;
@@ -635,6 +662,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
635static int spawn_https_helper_openssl(const char *host, unsigned port) 662static int spawn_https_helper_openssl(const char *host, unsigned port)
636{ 663{
637 char *allocated = NULL; 664 char *allocated = NULL;
665 char *servername;
638 int sp[2]; 666 int sp[2];
639 int pid; 667 int pid;
640 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;) 668 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
@@ -645,12 +673,14 @@ static int spawn_https_helper_openssl(const char *host, unsigned port)
645 673
646 if (!strchr(host, ':')) 674 if (!strchr(host, ':'))
647 host = allocated = xasprintf("%s:%u", host, port); 675 host = allocated = xasprintf("%s:%u", host, port);
676 servername = xstrdup(host);
677 strrchr(servername, ':')[0] = '\0';
648 678
649 fflush_all(); 679 fflush_all();
650 pid = xvfork(); 680 pid = xvfork();
651 if (pid == 0) { 681 if (pid == 0) {
652 /* Child */ 682 /* Child */
653 char *argv[6]; 683 char *argv[8];
654 684
655 close(sp[0]); 685 close(sp[0]);
656 xmove_fd(sp[1], 0); 686 xmove_fd(sp[1], 0);
@@ -662,12 +692,22 @@ static int spawn_https_helper_openssl(const char *host, unsigned port)
662 */ 692 */
663 xmove_fd(2, 3); 693 xmove_fd(2, 3);
664 xopen("/dev/null", O_RDWR); 694 xopen("/dev/null", O_RDWR);
695 memset(&argv, 0, sizeof(argv));
665 argv[0] = (char*)"openssl"; 696 argv[0] = (char*)"openssl";
666 argv[1] = (char*)"s_client"; 697 argv[1] = (char*)"s_client";
667 argv[2] = (char*)"-quiet"; 698 argv[2] = (char*)"-quiet";
668 argv[3] = (char*)"-connect"; 699 argv[3] = (char*)"-connect";
669 argv[4] = (char*)host; 700 argv[4] = (char*)host;
670 argv[5] = NULL; 701 /*
702 * Per RFC 6066 Section 3, the only permitted values in the
703 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
704 * IPv4 and IPv6 addresses, port numbers are not allowed.
705 */
706 if (!is_ip_address(servername)) {
707 argv[5] = (char*)"-servername";
708 argv[6] = (char*)servername;
709 }
710
671 BB_EXECVP(argv[0], argv); 711 BB_EXECVP(argv[0], argv);
672 xmove_fd(3, 2); 712 xmove_fd(3, 2);
673# if ENABLE_FEATURE_WGET_SSL_HELPER 713# if ENABLE_FEATURE_WGET_SSL_HELPER
@@ -680,6 +720,7 @@ static int spawn_https_helper_openssl(const char *host, unsigned port)
680 } 720 }
681 721
682 /* Parent */ 722 /* Parent */
723 free(servername);
683 free(allocated); 724 free(allocated);
684 close(sp[1]); 725 close(sp[1]);
685# if ENABLE_FEATURE_WGET_SSL_HELPER 726# if ENABLE_FEATURE_WGET_SSL_HELPER
@@ -1058,6 +1099,12 @@ static void download_one_url(const char *url)
1058 } 1099 }
1059 1100
1060 fflush(sfp); 1101 fflush(sfp);
1102 /* If we use SSL helper, keeping our end of the socket open for writing
1103 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1104 * even after child closes its copy of the fd.
1105 * This helps:
1106 */
1107 shutdown(fileno(sfp), SHUT_WR);
1061 1108
1062 /* 1109 /*
1063 * Retrieve HTTP response line and check for "200" status code. 1110 * Retrieve HTTP response line and check for "200" status code.
@@ -1077,7 +1124,21 @@ static void download_one_url(const char *url)
1077 while (gethdr(sfp) != NULL) 1124 while (gethdr(sfp) != NULL)
1078 /* eat all remaining headers */; 1125 /* eat all remaining headers */;
1079 goto read_response; 1126 goto read_response;
1127
1128 /* Success responses */
1080 case 200: 1129 case 200:
1130 /* fall through */
1131 case 201: /* 201 Created */
1132/* "The request has been fulfilled and resulted in a new resource being created" */
1133 /* Standard wget is reported to treat this as success */
1134 /* fall through */
1135 case 202: /* 202 Accepted */
1136/* "The request has been accepted for processing, but the processing has not been completed" */
1137 /* Treat as success: fall through */
1138 case 203: /* 203 Non-Authoritative Information */
1139/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1140 /* fall through */
1141 case 204: /* 204 No Content */
1081/* 1142/*
1082Response 204 doesn't say "null file", it says "metadata 1143Response 204 doesn't say "null file", it says "metadata
1083has changed but data didn't": 1144has changed but data didn't":
@@ -1102,7 +1163,6 @@ is always terminated by the first empty line after the header fields."
1102However, in real world it was observed that some web servers 1163However, in real world it was observed that some web servers
1103(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. 1164(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1104*/ 1165*/
1105 case 204:
1106 if (G.beg_range != 0) { 1166 if (G.beg_range != 0) {
1107 /* "Range:..." was not honored by the server. 1167 /* "Range:..." was not honored by the server.
1108 * Restart download from the beginning. 1168 * Restart download from the beginning.
@@ -1110,11 +1170,14 @@ However, in real world it was observed that some web servers
1110 reset_beg_range_to_zero(); 1170 reset_beg_range_to_zero();
1111 } 1171 }
1112 break; 1172 break;
1173 /* 205 Reset Content ?? what to do on this ?? */
1174
1113 case 300: /* redirection */ 1175 case 300: /* redirection */
1114 case 301: 1176 case 301:
1115 case 302: 1177 case 302:
1116 case 303: 1178 case 303:
1117 break; 1179 break;
1180
1118 case 206: /* Partial Content */ 1181 case 206: /* Partial Content */
1119 if (G.beg_range != 0) 1182 if (G.beg_range != 0)
1120 /* "Range:..." worked. Good. */ 1183 /* "Range:..." worked. Good. */
@@ -1231,8 +1294,6 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
1231 static const char wget_longopts[] ALIGN1 = 1294 static const char wget_longopts[] ALIGN1 =
1232 /* name, has_arg, val */ 1295 /* name, has_arg, val */
1233 "continue\0" No_argument "c" 1296 "continue\0" No_argument "c"
1234//FIXME: -s isn't --spider, it's --save-headers!
1235 "spider\0" No_argument "s"
1236 "quiet\0" No_argument "q" 1297 "quiet\0" No_argument "q"
1237 "output-document\0" Required_argument "O" 1298 "output-document\0" Required_argument "O"
1238 "directory-prefix\0" Required_argument "P" 1299 "directory-prefix\0" Required_argument "P"
@@ -1244,6 +1305,7 @@ IF_FEATURE_WGET_TIMEOUT(
1244IF_DESKTOP( "tries\0" Required_argument "t") 1305IF_DESKTOP( "tries\0" Required_argument "t")
1245 "header\0" Required_argument "\xff" 1306 "header\0" Required_argument "\xff"
1246 "post-data\0" Required_argument "\xfe" 1307 "post-data\0" Required_argument "\xfe"
1308 "spider\0" No_argument "\xfd"
1247 /* Ignored (we always use PASV): */ 1309 /* Ignored (we always use PASV): */
1248IF_DESKTOP( "passive-ftp\0" No_argument "\xf0") 1310IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1249 /* Ignored (we don't do ssl) */ 1311 /* Ignored (we don't do ssl) */
@@ -1275,7 +1337,7 @@ IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1275#endif 1337#endif
1276 opt_complementary = "-1" /* at least one URL */ 1338 opt_complementary = "-1" /* at least one URL */
1277 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */ 1339 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1278 getopt32(argv, "csqO:P:Y:U:T:+" 1340 getopt32(argv, "cqO:P:Y:U:T:+"
1279 /*ignored:*/ "t:" 1341 /*ignored:*/ "t:"
1280 /*ignored:*/ "n::" 1342 /*ignored:*/ "n::"
1281 /* wget has exactly four -n<letter> opts, all of which we can ignore: 1343 /* wget has exactly four -n<letter> opts, all of which we can ignore:
@@ -1294,6 +1356,14 @@ IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1294 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) 1356 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1295 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data) 1357 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1296 ); 1358 );
1359#if 0 /* option bits debug */
1360 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1361 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1362 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1363 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1364 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1365 exit(0);
1366#endif
1297 argv += optind; 1367 argv += optind;
1298 1368
1299#if ENABLE_FEATURE_WGET_LONG_OPTIONS 1369#if ENABLE_FEATURE_WGET_LONG_OPTIONS