diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2018-02-12 16:46:13 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2018-02-12 16:46:13 +0100 |
commit | 3459024bf404af814cacfe90a0deb719e282ae62 (patch) | |
tree | 01d11fb49c9a18b2d8b48fb646aa535dcb063d04 /networking/wget.c | |
parent | ecaec1dbecda67e5b740694878c0780eb4347d26 (diff) | |
download | busybox-w32-3459024bf404af814cacfe90a0deb719e282ae62.tar.gz busybox-w32-3459024bf404af814cacfe90a0deb719e282ae62.tar.bz2 busybox-w32-3459024bf404af814cacfe90a0deb719e282ae62.zip |
wget: more thorough sanitization of other side's data
function old new delta
get_sanitized_hdr - 156 +156
fgets_trim_sanitize - 128 +128
ftpcmd 129 133 +4
parse_url 461 454 -7
sanitize_string 14 - -14
wget_main 2431 2381 -50
fgets_and_trim 119 - -119
gethdr 163 - -163
------------------------------------------------------------------------------
(add/remove: 2/3 grow/shrink: 1/2 up/down: 288/-353) Total: -65 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/wget.c')
-rw-r--r-- | networking/wget.c | 86 |
1 files changed, 55 insertions, 31 deletions
diff --git a/networking/wget.c b/networking/wget.c index 95b88ad37..3a5d68173 100644 --- a/networking/wget.c +++ b/networking/wget.c | |||
@@ -352,15 +352,6 @@ static char *base64enc(const char *str) | |||
352 | } | 352 | } |
353 | #endif | 353 | #endif |
354 | 354 | ||
355 | static char* sanitize_string(char *s) | ||
356 | { | ||
357 | unsigned char *p = (void *) s; | ||
358 | while (*p >= ' ') | ||
359 | p++; | ||
360 | *p = '\0'; | ||
361 | return s; | ||
362 | } | ||
363 | |||
364 | #if ENABLE_FEATURE_WGET_TIMEOUT | 355 | #if ENABLE_FEATURE_WGET_TIMEOUT |
365 | static void alarm_handler(int sig UNUSED_PARAM) | 356 | static void alarm_handler(int sig UNUSED_PARAM) |
366 | { | 357 | { |
@@ -423,22 +414,49 @@ static FILE *open_socket(len_and_sockaddr *lsa) | |||
423 | return fp; | 414 | return fp; |
424 | } | 415 | } |
425 | 416 | ||
417 | /* We balk at any control chars in other side's messages. | ||
418 | * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs | ||
419 | * and error messages. | ||
420 | * | ||
421 | * The only exception is tabs, which are converted to (one) space: | ||
422 | * HTTP's "headers: <whitespace> values" may have those. | ||
423 | */ | ||
424 | static char* sanitize_string(char *s) | ||
425 | { | ||
426 | unsigned char *p = (void *) s; | ||
427 | while (*p) { | ||
428 | if (*p < ' ') { | ||
429 | if (*p != '\t') | ||
430 | break; | ||
431 | *p = ' '; | ||
432 | } | ||
433 | p++; | ||
434 | } | ||
435 | *p = '\0'; | ||
436 | return s; | ||
437 | } | ||
438 | |||
426 | /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ | 439 | /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ |
427 | static char fgets_and_trim(FILE *fp, const char *fmt) | 440 | static char fgets_trim_sanitize(FILE *fp, const char *fmt) |
428 | { | 441 | { |
429 | char c; | 442 | char c; |
430 | char *buf_ptr; | 443 | char *buf_ptr; |
431 | 444 | ||
432 | set_alarm(); | 445 | set_alarm(); |
433 | if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) | 446 | if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL) |
434 | bb_perror_msg_and_die("error getting response"); | 447 | bb_perror_msg_and_die("error getting response"); |
435 | clear_alarm(); | 448 | clear_alarm(); |
436 | 449 | ||
437 | buf_ptr = strchrnul(G.wget_buf, '\n'); | 450 | buf_ptr = strchrnul(G.wget_buf, '\n'); |
438 | c = *buf_ptr; | 451 | c = *buf_ptr; |
452 | #if 1 | ||
453 | /* Disallow any control chars: trim at first char < 0x20 */ | ||
454 | sanitize_string(G.wget_buf); | ||
455 | #else | ||
439 | *buf_ptr = '\0'; | 456 | *buf_ptr = '\0'; |
440 | buf_ptr = strchrnul(G.wget_buf, '\r'); | 457 | buf_ptr = strchrnul(G.wget_buf, '\r'); |
441 | *buf_ptr = '\0'; | 458 | *buf_ptr = '\0'; |
459 | #endif | ||
442 | 460 | ||
443 | log_io("< %s", G.wget_buf); | 461 | log_io("< %s", G.wget_buf); |
444 | 462 | ||
@@ -462,8 +480,10 @@ static int ftpcmd(const char *s1, const char *s2, FILE *fp) | |||
462 | log_io("> %s%s", s1, s2); | 480 | log_io("> %s%s", s1, s2); |
463 | } | 481 | } |
464 | 482 | ||
483 | /* Read until "Nxx something" is received */ | ||
484 | G.wget_buf[3] = 0; | ||
465 | do { | 485 | do { |
466 | fgets_and_trim(fp, "%s\n"); | 486 | fgets_trim_sanitize(fp, "%s\n"); |
467 | } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); | 487 | } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); |
468 | 488 | ||
469 | G.wget_buf[3] = '\0'; | 489 | G.wget_buf[3] = '\0'; |
@@ -505,7 +525,7 @@ static void parse_url(const char *src_url, struct host_info *h) | |||
505 | h->protocol = P_HTTP; | 525 | h->protocol = P_HTTP; |
506 | } else { | 526 | } else { |
507 | *p = ':'; | 527 | *p = ':'; |
508 | bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); | 528 | bb_error_msg_and_die("not an http or ftp url: %s", url); |
509 | } | 529 | } |
510 | } else { | 530 | } else { |
511 | // GNU wget is user-friendly and falls back to http:// | 531 | // GNU wget is user-friendly and falls back to http:// |
@@ -560,13 +580,13 @@ static void parse_url(const char *src_url, struct host_info *h) | |||
560 | */ | 580 | */ |
561 | } | 581 | } |
562 | 582 | ||
563 | static char *gethdr(FILE *fp) | 583 | static char *get_sanitized_hdr(FILE *fp) |
564 | { | 584 | { |
565 | char *s, *hdrval; | 585 | char *s, *hdrval; |
566 | int c; | 586 | int c; |
567 | 587 | ||
568 | /* retrieve header line */ | 588 | /* retrieve header line */ |
569 | c = fgets_and_trim(fp, " %s\n"); | 589 | c = fgets_trim_sanitize(fp, " %s\n"); |
570 | 590 | ||
571 | /* end of the headers? */ | 591 | /* end of the headers? */ |
572 | if (G.wget_buf[0] == '\0') | 592 | if (G.wget_buf[0] == '\0') |
@@ -588,7 +608,7 @@ static char *gethdr(FILE *fp) | |||
588 | 608 | ||
589 | /* verify we are at the end of the header name */ | 609 | /* verify we are at the end of the header name */ |
590 | if (*s != ':') | 610 | if (*s != ':') |
591 | bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf)); | 611 | bb_error_msg_and_die("bad header line: %s", G.wget_buf); |
592 | 612 | ||
593 | /* locate the start of the header value */ | 613 | /* locate the start of the header value */ |
594 | *s++ = '\0'; | 614 | *s++ = '\0'; |
@@ -755,7 +775,8 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
755 | #endif | 775 | #endif |
756 | 776 | ||
757 | if (ftpcmd(NULL, NULL, sfp) != 220) | 777 | if (ftpcmd(NULL, NULL, sfp) != 220) |
758 | bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4)); | 778 | bb_error_msg_and_die("%s", G.wget_buf); |
779 | /* note: ftpcmd() sanitizes G.wget_buf, ok to print */ | ||
759 | 780 | ||
760 | /* | 781 | /* |
761 | * Splitting username:password pair, | 782 | * Splitting username:password pair, |
@@ -772,7 +793,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
772 | break; | 793 | break; |
773 | /* fall through (failed login) */ | 794 | /* fall through (failed login) */ |
774 | default: | 795 | default: |
775 | bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4)); | 796 | bb_error_msg_and_die("ftp login: %s", G.wget_buf); |
776 | } | 797 | } |
777 | 798 | ||
778 | ftpcmd("TYPE I", NULL, sfp); | 799 | ftpcmd("TYPE I", NULL, sfp); |
@@ -796,7 +817,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
796 | } else | 817 | } else |
797 | if (ftpcmd("PASV", NULL, sfp) != 227) { | 818 | if (ftpcmd("PASV", NULL, sfp) != 227) { |
798 | pasv_error: | 819 | pasv_error: |
799 | bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf)); | 820 | bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf); |
800 | } | 821 | } |
801 | port = parse_pasv_epsv(G.wget_buf); | 822 | port = parse_pasv_epsv(G.wget_buf); |
802 | if (port < 0) | 823 | if (port < 0) |
@@ -824,8 +845,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
824 | reset_beg_range_to_zero(); | 845 | reset_beg_range_to_zero(); |
825 | } | 846 | } |
826 | 847 | ||
848 | //TODO: needs ftp-escaping 0xff and '\n' bytes here. | ||
849 | //Or disallow '\n' altogether via sanitize_string() in parse_url(). | ||
850 | //But 0xff's are possible in valid utf8 filenames. | ||
827 | if (ftpcmd("RETR ", target->path, sfp) > 150) | 851 | if (ftpcmd("RETR ", target->path, sfp) > 150) |
828 | bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf)); | 852 | bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf); |
829 | 853 | ||
830 | return sfp; | 854 | return sfp; |
831 | } | 855 | } |
@@ -946,9 +970,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp) | |||
946 | if (!G.chunked) | 970 | if (!G.chunked) |
947 | break; | 971 | break; |
948 | 972 | ||
949 | fgets_and_trim(dfp, NULL); /* Eat empty line */ | 973 | fgets_trim_sanitize(dfp, NULL); /* Eat empty line */ |
950 | get_clen: | 974 | get_clen: |
951 | fgets_and_trim(dfp, NULL); | 975 | fgets_trim_sanitize(dfp, NULL); |
952 | G.content_len = STRTOOFF(G.wget_buf, NULL, 16); | 976 | G.content_len = STRTOOFF(G.wget_buf, NULL, 16); |
953 | /* FIXME: error check? */ | 977 | /* FIXME: error check? */ |
954 | if (G.content_len == 0) | 978 | if (G.content_len == 0) |
@@ -1178,7 +1202,7 @@ static void download_one_url(const char *url) | |||
1178 | * Retrieve HTTP response line and check for "200" status code. | 1202 | * Retrieve HTTP response line and check for "200" status code. |
1179 | */ | 1203 | */ |
1180 | read_response: | 1204 | read_response: |
1181 | fgets_and_trim(sfp, " %s\n"); | 1205 | fgets_trim_sanitize(sfp, " %s\n"); |
1182 | 1206 | ||
1183 | str = G.wget_buf; | 1207 | str = G.wget_buf; |
1184 | str = skip_non_whitespace(str); | 1208 | str = skip_non_whitespace(str); |
@@ -1189,7 +1213,7 @@ static void download_one_url(const char *url) | |||
1189 | switch (status) { | 1213 | switch (status) { |
1190 | case 0: | 1214 | case 0: |
1191 | case 100: | 1215 | case 100: |
1192 | while (gethdr(sfp) != NULL) | 1216 | while (get_sanitized_hdr(sfp) != NULL) |
1193 | /* eat all remaining headers */; | 1217 | /* eat all remaining headers */; |
1194 | goto read_response; | 1218 | goto read_response; |
1195 | 1219 | ||
@@ -1253,13 +1277,13 @@ However, in real world it was observed that some web servers | |||
1253 | /* Partial Content even though we did not ask for it??? */ | 1277 | /* Partial Content even though we did not ask for it??? */ |
1254 | /* fall through */ | 1278 | /* fall through */ |
1255 | default: | 1279 | default: |
1256 | bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); | 1280 | bb_error_msg_and_die("server returned error: %s", G.wget_buf); |
1257 | } | 1281 | } |
1258 | 1282 | ||
1259 | /* | 1283 | /* |
1260 | * Retrieve HTTP headers. | 1284 | * Retrieve HTTP headers. |
1261 | */ | 1285 | */ |
1262 | while ((str = gethdr(sfp)) != NULL) { | 1286 | while ((str = get_sanitized_hdr(sfp)) != NULL) { |
1263 | static const char keywords[] ALIGN1 = | 1287 | static const char keywords[] ALIGN1 = |
1264 | "content-length\0""transfer-encoding\0""location\0"; | 1288 | "content-length\0""transfer-encoding\0""location\0"; |
1265 | enum { | 1289 | enum { |
@@ -1267,7 +1291,7 @@ However, in real world it was observed that some web servers | |||
1267 | }; | 1291 | }; |
1268 | smalluint key; | 1292 | smalluint key; |
1269 | 1293 | ||
1270 | /* gethdr converted "FOO:" string to lowercase */ | 1294 | /* get_sanitized_hdr converted "FOO:" string to lowercase */ |
1271 | 1295 | ||
1272 | /* strip trailing whitespace */ | 1296 | /* strip trailing whitespace */ |
1273 | char *s = strchrnul(str, '\0') - 1; | 1297 | char *s = strchrnul(str, '\0') - 1; |
@@ -1279,14 +1303,14 @@ However, in real world it was observed that some web servers | |||
1279 | if (key == KEY_content_length) { | 1303 | if (key == KEY_content_length) { |
1280 | G.content_len = BB_STRTOOFF(str, NULL, 10); | 1304 | G.content_len = BB_STRTOOFF(str, NULL, 10); |
1281 | if (G.content_len < 0 || errno) { | 1305 | if (G.content_len < 0 || errno) { |
1282 | bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); | 1306 | bb_error_msg_and_die("content-length %s is garbage", str); |
1283 | } | 1307 | } |
1284 | G.got_clen = 1; | 1308 | G.got_clen = 1; |
1285 | continue; | 1309 | continue; |
1286 | } | 1310 | } |
1287 | if (key == KEY_transfer_encoding) { | 1311 | if (key == KEY_transfer_encoding) { |
1288 | if (strcmp(str_tolower(str), "chunked") != 0) | 1312 | if (strcmp(str_tolower(str), "chunked") != 0) |
1289 | bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); | 1313 | bb_error_msg_and_die("transfer encoding '%s' is not supported", str); |
1290 | G.chunked = 1; | 1314 | G.chunked = 1; |
1291 | } | 1315 | } |
1292 | if (key == KEY_location && status >= 300) { | 1316 | if (key == KEY_location && status >= 300) { |
@@ -1295,7 +1319,7 @@ However, in real world it was observed that some web servers | |||
1295 | fclose(sfp); | 1319 | fclose(sfp); |
1296 | if (str[0] == '/') { | 1320 | if (str[0] == '/') { |
1297 | free(redirected_path); | 1321 | free(redirected_path); |
1298 | target.path = redirected_path = xstrdup(str+1); | 1322 | target.path = redirected_path = xstrdup(str + 1); |
1299 | /* lsa stays the same: it's on the same server */ | 1323 | /* lsa stays the same: it's on the same server */ |
1300 | } else { | 1324 | } else { |
1301 | parse_url(str, &target); | 1325 | parse_url(str, &target); |
@@ -1342,7 +1366,7 @@ However, in real world it was observed that some web servers | |||
1342 | /* It's ftp. Close data connection properly */ | 1366 | /* It's ftp. Close data connection properly */ |
1343 | fclose(dfp); | 1367 | fclose(dfp); |
1344 | if (ftpcmd(NULL, NULL, sfp) != 226) | 1368 | if (ftpcmd(NULL, NULL, sfp) != 226) |
1345 | bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4)); | 1369 | bb_error_msg_and_die("ftp error: %s", G.wget_buf); |
1346 | /* ftpcmd("QUIT", NULL, sfp); - why bother? */ | 1370 | /* ftpcmd("QUIT", NULL, sfp); - why bother? */ |
1347 | } | 1371 | } |
1348 | fclose(sfp); | 1372 | fclose(sfp); |