aboutsummaryrefslogtreecommitdiff
path: root/networking/wget.c
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2009-06-28 03:33:57 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2009-06-28 03:33:57 +0200
commitf1fab0924285cca27903a1e4a4498c7205810742 (patch)
tree3b1224f5faeb82e4f9a1ba5d9adf29579a103e23 /networking/wget.c
parentab0c8d7b35fbbe3e038800587cde0e3a99907187 (diff)
downloadbusybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.tar.gz
busybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.tar.bz2
busybox-w32-f1fab0924285cca27903a1e4a4498c7205810742.zip
wget: fix redirection from HTTP to FTP server
while at it, sanitize redirection in general; add printout of every redirection hop; make sure we won't print any non-ASCII garbage from remote server in error meesages. function old new delta sanitize_string - 14 +14 parse_url 294 301 +7 gethdr 190 197 +7 wget_main 2326 2331 +5 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 3/0 up/down: 33/0) Total: 33 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/wget.c')
-rw-r--r--networking/wget.c277
1 files changed, 140 insertions, 137 deletions
diff --git a/networking/wget.c b/networking/wget.c
index f826d1a8b..d518286a4 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -6,7 +6,6 @@
6 * 6 *
7 * Licensed under GPLv2, see file LICENSE in this tarball for details. 7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
8 */ 8 */
9
10#include "libbb.h" 9#include "libbb.h"
11 10
12struct host_info { 11struct host_info {
@@ -239,6 +238,15 @@ static char *base64enc_512(char buf[512], const char *str)
239} 238}
240#endif 239#endif
241 240
241static char* sanitize_string(char *s)
242{
243 unsigned char *p = (void *) s;
244 while (*p >= ' ')
245 p++;
246 *p = '\0';
247 return s;
248}
249
242static FILE *open_socket(len_and_sockaddr *lsa) 250static FILE *open_socket(len_and_sockaddr *lsa)
243{ 251{
244 FILE *fp; 252 FILE *fp;
@@ -294,7 +302,7 @@ static void parse_url(char *src_url, struct host_info *h)
294 h->host = url + 6; 302 h->host = url + 6;
295 h->is_ftp = 1; 303 h->is_ftp = 1;
296 } else 304 } else
297 bb_error_msg_and_die("not an http or ftp url: %s", url); 305 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
298 306
299 // FYI: 307 // FYI:
300 // "Real" wget 'http://busybox.net?var=a/b' sends this request: 308 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
@@ -360,7 +368,7 @@ static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
360 368
361 /* verify we are at the end of the header name */ 369 /* verify we are at the end of the header name */
362 if (*s != ':') 370 if (*s != ':')
363 bb_error_msg_and_die("bad header line: %s", buf); 371 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
364 372
365 /* locate the start of the header value */ 373 /* locate the start of the header value */
366 *s++ = '\0'; 374 *s++ = '\0';
@@ -433,7 +441,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
433 441
434 sfp = open_socket(lsa); 442 sfp = open_socket(lsa);
435 if (ftpcmd(NULL, NULL, sfp, buf) != 220) 443 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
436 bb_error_msg_and_die("%s", buf+4); 444 bb_error_msg_and_die("%s", sanitize_string(buf+4));
437 445
438 /* 446 /*
439 * Splitting username:password pair, 447 * Splitting username:password pair,
@@ -450,7 +458,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
450 break; 458 break;
451 /* fall through (failed login) */ 459 /* fall through (failed login) */
452 default: 460 default:
453 bb_error_msg_and_die("ftp login: %s", buf+4); 461 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
454 } 462 }
455 463
456 ftpcmd("TYPE I", NULL, sfp, buf); 464 ftpcmd("TYPE I", NULL, sfp, buf);
@@ -471,7 +479,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
471 */ 479 */
472 if (ftpcmd("PASV", NULL, sfp, buf) != 227) { 480 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
473 pasv_error: 481 pasv_error:
474 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf); 482 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
475 } 483 }
476 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] 484 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
477 // Server's IP is N1.N2.N3.N4 (we ignore it) 485 // Server's IP is N1.N2.N3.N4 (we ignore it)
@@ -496,7 +504,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
496 } 504 }
497 505
498 if (ftpcmd("RETR ", target->path, sfp, buf) > 150) 506 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
499 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf); 507 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
500 508
501 return sfp; 509 return sfp;
502} 510}
@@ -574,6 +582,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
574 struct host_info server, target; 582 struct host_info server, target;
575 len_and_sockaddr *lsa; 583 len_and_sockaddr *lsa;
576 unsigned opt; 584 unsigned opt;
585 int redir_limit;
577 char *proxy = NULL; 586 char *proxy = NULL;
578 char *dir_prefix = NULL; 587 char *dir_prefix = NULL;
579#if ENABLE_FEATURE_WGET_LONG_OPTIONS 588#if ENABLE_FEATURE_WGET_LONG_OPTIONS
@@ -696,104 +705,91 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
696 * We are not sure it exists on remove side */ 705 * We are not sure it exists on remove side */
697 } 706 }
698 707
699 /* We want to do exactly _one_ DNS lookup, since some 708 redir_limit = 5;
700 * sites (i.e. ftp.us.debian.org) use round-robin DNS 709 resolve_lsa:
701 * and we want to connect to only one IP... */
702 lsa = xhost2sockaddr(server.host, server.port); 710 lsa = xhost2sockaddr(server.host, server.port);
703 if (!(opt & WGET_OPT_QUIET)) { 711 if (!(opt & WGET_OPT_QUIET)) {
704 fprintf(stderr, "Connecting to %s (%s)\n", server.host, 712 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
705 xmalloc_sockaddr2dotted(&lsa->u.sa)); 713 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
706 /* We leak result of xmalloc_sockaddr2dotted */ 714 free(s);
707 } 715 }
708 716 establish_session:
709 /* G.got_clen = 0; - already is */
710 sfp = NULL;
711 if (use_proxy || !target.is_ftp) { 717 if (use_proxy || !target.is_ftp) {
712 /* 718 /*
713 * HTTP session 719 * HTTP session
714 */ 720 */
721 char *str;
715 int status; 722 int status;
716 int try = 5;
717
718 do {
719 char *str;
720
721 G.got_clen = 0;
722 G.chunked = 0;
723
724 if (!--try)
725 bb_error_msg_and_die("too many redirections");
726
727 /* Open socket to http server */
728 if (sfp) fclose(sfp);
729 sfp = open_socket(lsa);
730
731 /* Send HTTP request */
732 if (use_proxy) {
733 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
734 target.is_ftp ? "f" : "ht", target.host,
735 target.path);
736 } else {
737 if (opt & WGET_OPT_POST_DATA)
738 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
739 else
740 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
741 }
742 723
743 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", 724 /* Open socket to http server */
744 target.host, user_agent); 725 sfp = open_socket(lsa);
726
727 /* Send HTTP request */
728 if (use_proxy) {
729 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
730 target.is_ftp ? "f" : "ht", target.host,
731 target.path);
732 } else {
733 if (opt & WGET_OPT_POST_DATA)
734 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
735 else
736 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
737 }
738
739 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
740 target.host, user_agent);
745 741
746#if ENABLE_FEATURE_WGET_AUTHENTICATION 742#if ENABLE_FEATURE_WGET_AUTHENTICATION
747 if (target.user) { 743 if (target.user) {
748 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, 744 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
749 base64enc_512(buf, target.user)); 745 base64enc_512(buf, target.user));
750 } 746 }
751 if (use_proxy && server.user) { 747 if (use_proxy && server.user) {
752 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", 748 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
753 base64enc_512(buf, server.user)); 749 base64enc_512(buf, server.user));
754 } 750 }
755#endif 751#endif
756 752
757 if (beg_range) 753 if (beg_range)
758 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range); 754 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
759#if ENABLE_FEATURE_WGET_LONG_OPTIONS 755#if ENABLE_FEATURE_WGET_LONG_OPTIONS
760 if (extra_headers) 756 if (extra_headers)
761 fputs(extra_headers, sfp); 757 fputs(extra_headers, sfp);
762 758
763 if (opt & WGET_OPT_POST_DATA) { 759 if (opt & WGET_OPT_POST_DATA) {
764 char *estr = URL_escape(post_data); 760 char *estr = URL_escape(post_data);
765 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); 761 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
766 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", 762 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
767 (int) strlen(estr), estr); 763 (int) strlen(estr), estr);
768 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ 764 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
769 /*fprintf(sfp, "%s\r\n", estr);*/ 765 /*fprintf(sfp, "%s\r\n", estr);*/
770 free(estr); 766 free(estr);
771 } else 767 } else
772#endif 768#endif
773 { /* If "Connection:" is needed, document why */ 769 { /* If "Connection:" is needed, document why */
774 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); 770 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
775 } 771 }
776 772
777 /* 773 /*
778 * Retrieve HTTP response line and check for "200" status code. 774 * Retrieve HTTP response line and check for "200" status code.
779 */ 775 */
780 read_response: 776 read_response:
781 if (fgets(buf, sizeof(buf), sfp) == NULL) 777 if (fgets(buf, sizeof(buf), sfp) == NULL)
782 bb_error_msg_and_die("no response from server"); 778 bb_error_msg_and_die("no response from server");
783 779
784 str = buf; 780 str = buf;
785 str = skip_non_whitespace(str); 781 str = skip_non_whitespace(str);
786 str = skip_whitespace(str); 782 str = skip_whitespace(str);
787 // FIXME: no error check 783 // FIXME: no error check
788 // xatou wouldn't work: "200 OK" 784 // xatou wouldn't work: "200 OK"
789 status = atoi(str); 785 status = atoi(str);
790 switch (status) { 786 switch (status) {
791 case 0: 787 case 0:
792 case 100: 788 case 100:
793 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) 789 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
794 /* eat all remaining headers */; 790 /* eat all remaining headers */;
795 goto read_response; 791 goto read_response;
796 case 200: 792 case 200:
797/* 793/*
798Response 204 doesn't say "null file", it says "metadata 794Response 204 doesn't say "null file", it says "metadata
799has changed but data didn't": 795has changed but data didn't":
@@ -818,60 +814,66 @@ is always terminated by the first empty line after the header fields."
818However, in real world it was observed that some web servers 814However, in real world it was observed that some web servers
819(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. 815(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
820*/ 816*/
821 case 204: 817 case 204:
822 break; 818 break;
823 case 300: /* redirection */ 819 case 300: /* redirection */
824 case 301: 820 case 301:
825 case 302: 821 case 302:
826 case 303: 822 case 303:
823 break;
824 case 206:
825 if (beg_range)
827 break; 826 break;
828 case 206: 827 /* fall through */
829 if (beg_range) 828 default:
830 break; 829 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
831 /* fall through */ 830 }
832 default:
833 /* Show first line only and kill any ESC tricks */
834 buf[strcspn(buf, "\n\r\x1b")] = '\0';
835 bb_error_msg_and_die("server returned error: %s", buf);
836 }
837 831
838 /* 832 /*
839 * Retrieve HTTP headers. 833 * Retrieve HTTP headers.
840 */ 834 */
841 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { 835 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
842 /* gethdr converted "FOO:" string to lowercase */ 836 /* gethdr converted "FOO:" string to lowercase */
843 smalluint key = index_in_strings(keywords, buf) + 1; 837 smalluint key = index_in_strings(keywords, buf) + 1;
844 if (key == KEY_content_length) { 838 if (key == KEY_content_length) {
845 content_len = BB_STRTOOFF(str, NULL, 10); 839 content_len = BB_STRTOOFF(str, NULL, 10);
846 if (errno || content_len < 0) { 840 if (errno || content_len < 0) {
847 bb_error_msg_and_die("content-length %s is garbage", str); 841 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
848 }
849 G.got_clen = 1;
850 continue;
851 }
852 if (key == KEY_transfer_encoding) {
853 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
854 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
855 G.chunked = G.got_clen = 1;
856 } 842 }
857 if (key == KEY_location) { 843 G.got_clen = 1;
858 if (str[0] == '/') 844 continue;
859 /* free(target.allocated); */ 845 }
860 target.path = /* target.allocated = */ xstrdup(str+1); 846 if (key == KEY_transfer_encoding) {
861 else { 847 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
862 parse_url(str, &target); 848 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
863 if (use_proxy == 0) { 849 G.chunked = G.got_clen = 1;
864 server.host = target.host; 850 }
865 server.port = target.port; 851 if (key == KEY_location && status >= 300) {
866 } 852 if (--redir_limit == 0)
853 bb_error_msg_and_die("too many redirections");
854 fclose(sfp);
855 G.got_clen = 0;
856 G.chunked = 0;
857 if (str[0] == '/')
858 /* free(target.allocated); */
859 target.path = /* target.allocated = */ xstrdup(str+1);
860 /* lsa stays the same: it's on the same server */
861 else {
862 parse_url(str, &target);
863 if (!use_proxy) {
864 server.host = target.host;
865 server.port = target.port;
867 free(lsa); 866 free(lsa);
868 lsa = xhost2sockaddr(server.host, server.port); 867 goto resolve_lsa;
869 break; 868 } /* else: lsa stays the same: we use proxy */
870 }
871 } 869 }
870 goto establish_session;
872 } 871 }
873 } while (status >= 300); 872 }
873// if (status >= 300)
874// bb_error_msg_and_die("bad redirection (no Location: header from server)");
874 875
876 /* For HTTP, data is pumped over the same connection */
875 dfp = sfp; 877 dfp = sfp;
876 878
877 } else { 879 } else {
@@ -897,10 +899,11 @@ However, in real world it was observed that some web servers
897 899
898 retrieve_file_data(dfp, output_fd); 900 retrieve_file_data(dfp, output_fd);
899 901
900 if ((use_proxy == 0) && target.is_ftp) { 902 if (dfp != sfp) {
903 /* It's ftp. Close it properly */
901 fclose(dfp); 904 fclose(dfp);
902 if (ftpcmd(NULL, NULL, sfp, buf) != 226) 905 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
903 bb_error_msg_and_die("ftp error: %s", buf+4); 906 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
904 ftpcmd("QUIT", NULL, sfp, buf); 907 ftpcmd("QUIT", NULL, sfp, buf);
905 } 908 }
906 909