diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2009-06-28 03:33:57 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2009-06-28 03:33:57 +0200 |
commit | f1fab0924285cca27903a1e4a4498c7205810742 (patch) | |
tree | 3b1224f5faeb82e4f9a1ba5d9adf29579a103e23 /networking/wget.c | |
parent | ab0c8d7b35fbbe3e038800587cde0e3a99907187 (diff) | |
download | busybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.tar.gz busybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.tar.bz2 busybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.zip |
wget: fix redirection from HTTP to FTP server
while at it, sanitize redirection in general; add printout
of every redirection hop; make sure we won't print any non-ASCII
garbage from remote server in error meesages.
function old new delta
sanitize_string - 14 +14
parse_url 294 301 +7
gethdr 190 197 +7
wget_main 2326 2331 +5
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 3/0 up/down: 33/0) Total: 33 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/wget.c')
-rw-r--r-- | networking/wget.c | 277 |
1 files changed, 140 insertions, 137 deletions
diff --git a/networking/wget.c b/networking/wget.c index f826d1a8b..d518286a4 100644 --- a/networking/wget.c +++ b/networking/wget.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * | 6 | * |
7 | * Licensed under GPLv2, see file LICENSE in this tarball for details. | 7 | * Licensed under GPLv2, see file LICENSE in this tarball for details. |
8 | */ | 8 | */ |
9 | |||
10 | #include "libbb.h" | 9 | #include "libbb.h" |
11 | 10 | ||
12 | struct host_info { | 11 | struct host_info { |
@@ -239,6 +238,15 @@ static char *base64enc_512(char buf[512], const char *str) | |||
239 | } | 238 | } |
240 | #endif | 239 | #endif |
241 | 240 | ||
241 | static char* sanitize_string(char *s) | ||
242 | { | ||
243 | unsigned char *p = (void *) s; | ||
244 | while (*p >= ' ') | ||
245 | p++; | ||
246 | *p = '\0'; | ||
247 | return s; | ||
248 | } | ||
249 | |||
242 | static FILE *open_socket(len_and_sockaddr *lsa) | 250 | static FILE *open_socket(len_and_sockaddr *lsa) |
243 | { | 251 | { |
244 | FILE *fp; | 252 | FILE *fp; |
@@ -294,7 +302,7 @@ static void parse_url(char *src_url, struct host_info *h) | |||
294 | h->host = url + 6; | 302 | h->host = url + 6; |
295 | h->is_ftp = 1; | 303 | h->is_ftp = 1; |
296 | } else | 304 | } else |
297 | bb_error_msg_and_die("not an http or ftp url: %s", url); | 305 | bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); |
298 | 306 | ||
299 | // FYI: | 307 | // FYI: |
300 | // "Real" wget 'http://busybox.net?var=a/b' sends this request: | 308 | // "Real" wget 'http://busybox.net?var=a/b' sends this request: |
@@ -360,7 +368,7 @@ static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/) | |||
360 | 368 | ||
361 | /* verify we are at the end of the header name */ | 369 | /* verify we are at the end of the header name */ |
362 | if (*s != ':') | 370 | if (*s != ':') |
363 | bb_error_msg_and_die("bad header line: %s", buf); | 371 | bb_error_msg_and_die("bad header line: %s", sanitize_string(buf)); |
364 | 372 | ||
365 | /* locate the start of the header value */ | 373 | /* locate the start of the header value */ |
366 | *s++ = '\0'; | 374 | *s++ = '\0'; |
@@ -433,7 +441,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
433 | 441 | ||
434 | sfp = open_socket(lsa); | 442 | sfp = open_socket(lsa); |
435 | if (ftpcmd(NULL, NULL, sfp, buf) != 220) | 443 | if (ftpcmd(NULL, NULL, sfp, buf) != 220) |
436 | bb_error_msg_and_die("%s", buf+4); | 444 | bb_error_msg_and_die("%s", sanitize_string(buf+4)); |
437 | 445 | ||
438 | /* | 446 | /* |
439 | * Splitting username:password pair, | 447 | * Splitting username:password pair, |
@@ -450,7 +458,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
450 | break; | 458 | break; |
451 | /* fall through (failed login) */ | 459 | /* fall through (failed login) */ |
452 | default: | 460 | default: |
453 | bb_error_msg_and_die("ftp login: %s", buf+4); | 461 | bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4)); |
454 | } | 462 | } |
455 | 463 | ||
456 | ftpcmd("TYPE I", NULL, sfp, buf); | 464 | ftpcmd("TYPE I", NULL, sfp, buf); |
@@ -471,7 +479,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
471 | */ | 479 | */ |
472 | if (ftpcmd("PASV", NULL, sfp, buf) != 227) { | 480 | if (ftpcmd("PASV", NULL, sfp, buf) != 227) { |
473 | pasv_error: | 481 | pasv_error: |
474 | bb_error_msg_and_die("bad response to %s: %s", "PASV", buf); | 482 | bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf)); |
475 | } | 483 | } |
476 | // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] | 484 | // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] |
477 | // Server's IP is N1.N2.N3.N4 (we ignore it) | 485 | // Server's IP is N1.N2.N3.N4 (we ignore it) |
@@ -496,7 +504,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ | |||
496 | } | 504 | } |
497 | 505 | ||
498 | if (ftpcmd("RETR ", target->path, sfp, buf) > 150) | 506 | if (ftpcmd("RETR ", target->path, sfp, buf) > 150) |
499 | bb_error_msg_and_die("bad response to %s: %s", "RETR", buf); | 507 | bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf)); |
500 | 508 | ||
501 | return sfp; | 509 | return sfp; |
502 | } | 510 | } |
@@ -574,6 +582,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv) | |||
574 | struct host_info server, target; | 582 | struct host_info server, target; |
575 | len_and_sockaddr *lsa; | 583 | len_and_sockaddr *lsa; |
576 | unsigned opt; | 584 | unsigned opt; |
585 | int redir_limit; | ||
577 | char *proxy = NULL; | 586 | char *proxy = NULL; |
578 | char *dir_prefix = NULL; | 587 | char *dir_prefix = NULL; |
579 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | 588 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS |
@@ -696,104 +705,91 @@ int wget_main(int argc UNUSED_PARAM, char **argv) | |||
696 | * We are not sure it exists on remove side */ | 705 | * We are not sure it exists on remove side */ |
697 | } | 706 | } |
698 | 707 | ||
699 | /* We want to do exactly _one_ DNS lookup, since some | 708 | redir_limit = 5; |
700 | * sites (i.e. ftp.us.debian.org) use round-robin DNS | 709 | resolve_lsa: |
701 | * and we want to connect to only one IP... */ | ||
702 | lsa = xhost2sockaddr(server.host, server.port); | 710 | lsa = xhost2sockaddr(server.host, server.port); |
703 | if (!(opt & WGET_OPT_QUIET)) { | 711 | if (!(opt & WGET_OPT_QUIET)) { |
704 | fprintf(stderr, "Connecting to %s (%s)\n", server.host, | 712 | char *s = xmalloc_sockaddr2dotted(&lsa->u.sa); |
705 | xmalloc_sockaddr2dotted(&lsa->u.sa)); | 713 | fprintf(stderr, "Connecting to %s (%s)\n", server.host, s); |
706 | /* We leak result of xmalloc_sockaddr2dotted */ | 714 | free(s); |
707 | } | 715 | } |
708 | 716 | establish_session: | |
709 | /* G.got_clen = 0; - already is */ | ||
710 | sfp = NULL; | ||
711 | if (use_proxy || !target.is_ftp) { | 717 | if (use_proxy || !target.is_ftp) { |
712 | /* | 718 | /* |
713 | * HTTP session | 719 | * HTTP session |
714 | */ | 720 | */ |
721 | char *str; | ||
715 | int status; | 722 | int status; |
716 | int try = 5; | ||
717 | |||
718 | do { | ||
719 | char *str; | ||
720 | |||
721 | G.got_clen = 0; | ||
722 | G.chunked = 0; | ||
723 | |||
724 | if (!--try) | ||
725 | bb_error_msg_and_die("too many redirections"); | ||
726 | |||
727 | /* Open socket to http server */ | ||
728 | if (sfp) fclose(sfp); | ||
729 | sfp = open_socket(lsa); | ||
730 | |||
731 | /* Send HTTP request */ | ||
732 | if (use_proxy) { | ||
733 | fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", | ||
734 | target.is_ftp ? "f" : "ht", target.host, | ||
735 | target.path); | ||
736 | } else { | ||
737 | if (opt & WGET_OPT_POST_DATA) | ||
738 | fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); | ||
739 | else | ||
740 | fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); | ||
741 | } | ||
742 | 723 | ||
743 | fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", | 724 | /* Open socket to http server */ |
744 | target.host, user_agent); | 725 | sfp = open_socket(lsa); |
726 | |||
727 | /* Send HTTP request */ | ||
728 | if (use_proxy) { | ||
729 | fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", | ||
730 | target.is_ftp ? "f" : "ht", target.host, | ||
731 | target.path); | ||
732 | } else { | ||
733 | if (opt & WGET_OPT_POST_DATA) | ||
734 | fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); | ||
735 | else | ||
736 | fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); | ||
737 | } | ||
738 | |||
739 | fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", | ||
740 | target.host, user_agent); | ||
745 | 741 | ||
746 | #if ENABLE_FEATURE_WGET_AUTHENTICATION | 742 | #if ENABLE_FEATURE_WGET_AUTHENTICATION |
747 | if (target.user) { | 743 | if (target.user) { |
748 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, | 744 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, |
749 | base64enc_512(buf, target.user)); | 745 | base64enc_512(buf, target.user)); |
750 | } | 746 | } |
751 | if (use_proxy && server.user) { | 747 | if (use_proxy && server.user) { |
752 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", | 748 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", |
753 | base64enc_512(buf, server.user)); | 749 | base64enc_512(buf, server.user)); |
754 | } | 750 | } |
755 | #endif | 751 | #endif |
756 | 752 | ||
757 | if (beg_range) | 753 | if (beg_range) |
758 | fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range); | 754 | fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range); |
759 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | 755 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS |
760 | if (extra_headers) | 756 | if (extra_headers) |
761 | fputs(extra_headers, sfp); | 757 | fputs(extra_headers, sfp); |
762 | 758 | ||
763 | if (opt & WGET_OPT_POST_DATA) { | 759 | if (opt & WGET_OPT_POST_DATA) { |
764 | char *estr = URL_escape(post_data); | 760 | char *estr = URL_escape(post_data); |
765 | fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); | 761 | fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); |
766 | fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", | 762 | fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", |
767 | (int) strlen(estr), estr); | 763 | (int) strlen(estr), estr); |
768 | /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ | 764 | /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ |
769 | /*fprintf(sfp, "%s\r\n", estr);*/ | 765 | /*fprintf(sfp, "%s\r\n", estr);*/ |
770 | free(estr); | 766 | free(estr); |
771 | } else | 767 | } else |
772 | #endif | 768 | #endif |
773 | { /* If "Connection:" is needed, document why */ | 769 | { /* If "Connection:" is needed, document why */ |
774 | fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); | 770 | fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); |
775 | } | 771 | } |
776 | 772 | ||
777 | /* | 773 | /* |
778 | * Retrieve HTTP response line and check for "200" status code. | 774 | * Retrieve HTTP response line and check for "200" status code. |
779 | */ | 775 | */ |
780 | read_response: | 776 | read_response: |
781 | if (fgets(buf, sizeof(buf), sfp) == NULL) | 777 | if (fgets(buf, sizeof(buf), sfp) == NULL) |
782 | bb_error_msg_and_die("no response from server"); | 778 | bb_error_msg_and_die("no response from server"); |
783 | 779 | ||
784 | str = buf; | 780 | str = buf; |
785 | str = skip_non_whitespace(str); | 781 | str = skip_non_whitespace(str); |
786 | str = skip_whitespace(str); | 782 | str = skip_whitespace(str); |
787 | // FIXME: no error check | 783 | // FIXME: no error check |
788 | // xatou wouldn't work: "200 OK" | 784 | // xatou wouldn't work: "200 OK" |
789 | status = atoi(str); | 785 | status = atoi(str); |
790 | switch (status) { | 786 | switch (status) { |
791 | case 0: | 787 | case 0: |
792 | case 100: | 788 | case 100: |
793 | while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) | 789 | while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) |
794 | /* eat all remaining headers */; | 790 | /* eat all remaining headers */; |
795 | goto read_response; | 791 | goto read_response; |
796 | case 200: | 792 | case 200: |
797 | /* | 793 | /* |
798 | Response 204 doesn't say "null file", it says "metadata | 794 | Response 204 doesn't say "null file", it says "metadata |
799 | has changed but data didn't": | 795 | has changed but data didn't": |
@@ -818,60 +814,66 @@ is always terminated by the first empty line after the header fields." | |||
818 | However, in real world it was observed that some web servers | 814 | However, in real world it was observed that some web servers |
819 | (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. | 815 | (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. |
820 | */ | 816 | */ |
821 | case 204: | 817 | case 204: |
822 | break; | 818 | break; |
823 | case 300: /* redirection */ | 819 | case 300: /* redirection */ |
824 | case 301: | 820 | case 301: |
825 | case 302: | 821 | case 302: |
826 | case 303: | 822 | case 303: |
823 | break; | ||
824 | case 206: | ||
825 | if (beg_range) | ||
827 | break; | 826 | break; |
828 | case 206: | 827 | /* fall through */ |
829 | if (beg_range) | 828 | default: |
830 | break; | 829 | bb_error_msg_and_die("server returned error: %s", sanitize_string(buf)); |
831 | /* fall through */ | 830 | } |
832 | default: | ||
833 | /* Show first line only and kill any ESC tricks */ | ||
834 | buf[strcspn(buf, "\n\r\x1b")] = '\0'; | ||
835 | bb_error_msg_and_die("server returned error: %s", buf); | ||
836 | } | ||
837 | 831 | ||
838 | /* | 832 | /* |
839 | * Retrieve HTTP headers. | 833 | * Retrieve HTTP headers. |
840 | */ | 834 | */ |
841 | while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { | 835 | while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { |
842 | /* gethdr converted "FOO:" string to lowercase */ | 836 | /* gethdr converted "FOO:" string to lowercase */ |
843 | smalluint key = index_in_strings(keywords, buf) + 1; | 837 | smalluint key = index_in_strings(keywords, buf) + 1; |
844 | if (key == KEY_content_length) { | 838 | if (key == KEY_content_length) { |
845 | content_len = BB_STRTOOFF(str, NULL, 10); | 839 | content_len = BB_STRTOOFF(str, NULL, 10); |
846 | if (errno || content_len < 0) { | 840 | if (errno || content_len < 0) { |
847 | bb_error_msg_and_die("content-length %s is garbage", str); | 841 | bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); |
848 | } | ||
849 | G.got_clen = 1; | ||
850 | continue; | ||
851 | } | ||
852 | if (key == KEY_transfer_encoding) { | ||
853 | if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked) | ||
854 | bb_error_msg_and_die("transfer encoding '%s' is not supported", str); | ||
855 | G.chunked = G.got_clen = 1; | ||
856 | } | 842 | } |
857 | if (key == KEY_location) { | 843 | G.got_clen = 1; |
858 | if (str[0] == '/') | 844 | continue; |
859 | /* free(target.allocated); */ | 845 | } |
860 | target.path = /* target.allocated = */ xstrdup(str+1); | 846 | if (key == KEY_transfer_encoding) { |
861 | else { | 847 | if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked) |
862 | parse_url(str, &target); | 848 | bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); |
863 | if (use_proxy == 0) { | 849 | G.chunked = G.got_clen = 1; |
864 | server.host = target.host; | 850 | } |
865 | server.port = target.port; | 851 | if (key == KEY_location && status >= 300) { |
866 | } | 852 | if (--redir_limit == 0) |
853 | bb_error_msg_and_die("too many redirections"); | ||
854 | fclose(sfp); | ||
855 | G.got_clen = 0; | ||
856 | G.chunked = 0; | ||
857 | if (str[0] == '/') | ||
858 | /* free(target.allocated); */ | ||
859 | target.path = /* target.allocated = */ xstrdup(str+1); | ||
860 | /* lsa stays the same: it's on the same server */ | ||
861 | else { | ||
862 | parse_url(str, &target); | ||
863 | if (!use_proxy) { | ||
864 | server.host = target.host; | ||
865 | server.port = target.port; | ||
867 | free(lsa); | 866 | free(lsa); |
868 | lsa = xhost2sockaddr(server.host, server.port); | 867 | goto resolve_lsa; |
869 | break; | 868 | } /* else: lsa stays the same: we use proxy */ |
870 | } | ||
871 | } | 869 | } |
870 | goto establish_session; | ||
872 | } | 871 | } |
873 | } while (status >= 300); | 872 | } |
873 | // if (status >= 300) | ||
874 | // bb_error_msg_and_die("bad redirection (no Location: header from server)"); | ||
874 | 875 | ||
876 | /* For HTTP, data is pumped over the same connection */ | ||
875 | dfp = sfp; | 877 | dfp = sfp; |
876 | 878 | ||
877 | } else { | 879 | } else { |
@@ -897,10 +899,11 @@ However, in real world it was observed that some web servers | |||
897 | 899 | ||
898 | retrieve_file_data(dfp, output_fd); | 900 | retrieve_file_data(dfp, output_fd); |
899 | 901 | ||
900 | if ((use_proxy == 0) && target.is_ftp) { | 902 | if (dfp != sfp) { |
903 | /* It's ftp. Close it properly */ | ||
901 | fclose(dfp); | 904 | fclose(dfp); |
902 | if (ftpcmd(NULL, NULL, sfp, buf) != 226) | 905 | if (ftpcmd(NULL, NULL, sfp, buf) != 226) |
903 | bb_error_msg_and_die("ftp error: %s", buf+4); | 906 | bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4)); |
904 | ftpcmd("QUIT", NULL, sfp, buf); | 907 | ftpcmd("QUIT", NULL, sfp, buf); |
905 | } | 908 | } |
906 | 909 | ||