/** * @PROJECT WebGet Downloader * @COPYRIGHT See COPYING in the top level directory * @FILE libweb.c * @PURPOSE Common Web Library * @DEVELOPERS Al Globus * Rafal Kupiec * Jef Poskanzer */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CROSS_BUILD #include #endif #include "libweb.h" int b64_encode(unsigned char* ptr, int len, char* space, int size) { int ptr_idx, space_idx, phase; char c; space_idx = 0; phase = 0; for(ptr_idx = 0; ptr_idx < len; ++ptr_idx) { switch(phase) { case 0: c = b64_encode_table[ptr[ptr_idx] >> 2]; if(space_idx < size) { space[space_idx++] = c; } c = b64_encode_table[(ptr[ptr_idx] & 0x3) << 4]; if(space_idx < size) { space[space_idx++] = c; } ++phase; break; case 1: space[space_idx - 1] = b64_encode_table[b64_decode_table[(int) ((unsigned char) space[space_idx - 1])] | (ptr[ptr_idx] >> 4)]; c = b64_encode_table[(ptr[ptr_idx] & 0xf) << 2]; if(space_idx < size) { space[space_idx++] = c; } ++phase; break; case 2: space[space_idx - 1] = b64_encode_table[b64_decode_table[(int) ((unsigned char) space[space_idx - 1])] | (ptr[ptr_idx] >> 6)]; c = b64_encode_table[ptr[ptr_idx] & 0x3f]; if(space_idx < size) { space[space_idx++] = c; } phase = 0; break; } } while(phase++ < 3) { if(space_idx < size) { space[space_idx++] = '='; } } return space_idx; } void check(void* ptr) { if(ptr == (void*) 0) { (void) fprintf(stderr, "%s: out of memory\n", argv0); exit(1); } } off_t file_bytes(const char* filename) { struct stat sb; if(stat(filename, &sb) < 0) { perror(filename); exit(1); } return sb.st_size; } int file_copy(const char* filename, char* buf) { int fd; struct stat sb; off_t bytes; fd = open(filename, O_RDONLY); if(fd == -1) { perror(filename); exit(-1); } if(fstat(fd, &sb) != 0) { perror(filename); exit(-1); } bytes = sb.st_size; if(read(fd, buf, bytes) != bytes) { perror(filename); exit(-1); } (void) close(fd); return bytes; } int getURL(char* url, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value) { char* s; int protocol; char host[2000]; int host_len; unsigned short port; char* file = (char*) 0; char* http = "http://"; int http_len = strlen( http ); char* https = "https://"; int https_len = strlen( https ); int proto_len; if(url == (char*) 0) { (void) fprintf(stderr, "%s: null URL\n", argv0); exit(1); } if(strncmp(http, url, http_len) == 0) { proto_len = http_len; protocol = PROTO_HTTP; } else if(strncmp(https, url, https_len) == 0) { proto_len = https_len; protocol = PROTO_HTTPS; } else { (void) fprintf(stderr, "%s: non-http URL\n", argv0); exit(1); } for(s = url + proto_len; *s != '\0' && *s != ':' && *s != '/'; ++s); host_len = s - url; host_len -= proto_len; strncpy(host, url + proto_len, host_len); host[host_len] = '\0'; if(*s == ':') { port = (unsigned short) atoi(++s); while(*s != '\0' && *s != '/') ++s; } else { if(protocol == PROTO_HTTPS) { port = 443; } else { port = 80; } } if(*s == '\0') { file = "/"; } else { file = s; } return getURLbyParts(protocol, host, port, file, referer, user_agent, auth_token, ncookies, cookies, header_name, header_value); } int getURLbyParts(int protocol, char* host, unsigned short port, char* file, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value) { int sockfd; SSL_CTX* ssl_ctx = (SSL_CTX*) 0; SSL* ssl = (SSL*) 0; char buf[20000]; int i, bytes, b, header_state, status; (void) alarm(timeout); sockfd = open_client_socket(host, port); if(protocol == PROTO_HTTPS) { int r; ssl_ctx = SSL_CTX_new(TLSv1_client_method()); SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_NONE, 0); ssl = SSL_new(ssl_ctx); SSL_set_fd(ssl, sockfd); r = SSL_connect(ssl); if(r <= 0) { (void) fprintf(stderr, "%s: %s - SSL connection failed - %d\n", argv0, url, r); exit(1); } } (void) alarm(timeout); bytes = snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n", file); bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Host: %s\r\n", host); if(referer != (char*) 0) { bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Referer: %s\r\n", referer); } bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "User-Agent: %s\r\n", user_agent); bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Accept: */*\r\n"); bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Accept-Language: en\r\n"); bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Accept-Charset: iso-8859-1,*,utf-8\r\n"); if(auth_token != (char*) 0) { char token_buf[1000]; token_buf[b64_encode((unsigned char*) auth_token, strlen( auth_token ), token_buf, sizeof(token_buf))] = '\0'; bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Authorization: Basic %s\r\n", token_buf); } for(i = 0; i < ncookies; ++i) { bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "Cookie: %s\r\n", cookies[i]); } if(header_name != (char*) 0) { bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "%s: %s\r\n", header_name, header_value); } bytes += snprintf(&buf[bytes], sizeof(buf) - bytes, "\r\n"); if (protocol == PROTO_HTTPS) (void) SSL_write(ssl, buf, bytes); else (void) write(sockfd, buf, bytes); (void) alarm(timeout); header_state = HDST_LINE1_PROTOCOL; status = 0; for(;;) { if(protocol == PROTO_HTTPS) { bytes = SSL_read(ssl, buf, sizeof(buf)); } else { bytes = read(sockfd, buf, sizeof(buf)); } if(bytes <= 0) { break; } for(b = 0; b < bytes; ++b) { if(verbose) { (void) write(1, &buf[b], 1); } switch(header_state) { case HDST_LINE1_PROTOCOL: switch(buf[b]) { case ' ': case '\t': header_state = HDST_LINE1_WHITESPACE; break; case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; } break; case HDST_LINE1_WHITESPACE: switch(buf[b]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': status = buf[b] - '0'; header_state = HDST_LINE1_STATUS; break; case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_LINE1_STATUS: switch(buf[b]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': status = status * 10 + buf[b] - '0'; break; case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_BOL: switch(buf[b]) { case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_TEXT: switch(buf[b]) { case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; } break; case HDST_LF: switch(buf[b]) { case '\n': goto end_of_headers; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_CR: switch(buf[b]) { case '\n': header_state = HDST_CRLF; break; case '\r': goto end_of_headers; default: header_state = HDST_TEXT; break; } break; case HDST_CRLF: switch(buf[b]) { case '\n': goto end_of_headers; case '\r': header_state = HDST_CRLFCR; break; default: header_state = HDST_TEXT; break; } break; case HDST_CRLFCR: switch(buf[b]) { case '\n': case '\r': goto end_of_headers; default: header_state = HDST_TEXT; break; } break; } } } end_of_headers: if(bytes > 0) { ++b; (void) write(1, &buf[b], bytes - b); } for(;;) { (void) alarm(timeout); if(protocol == PROTO_HTTPS) { bytes = SSL_read(ssl, buf, sizeof(buf)); } else { bytes = read(sockfd, buf, sizeof(buf)); } if(bytes == 0) { break; } if(bytes < 0) { show_error("read"); } (void) write(1, buf, bytes); } if(protocol == PROTO_HTTPS) { SSL_free(ssl); SSL_CTX_free(ssl_ctx); } (void) close(sockfd); return status; } void* malloc_check(size_t size) { void* ptr = malloc(size); check(ptr); return ptr; } int open_client_socket(char* hostname, unsigned short port) { struct addrinfo hints; char portstr[10]; int gaierr; struct addrinfo* ai; struct addrinfo* ai2; struct addrinfo* aiv4; struct addrinfo* aiv6; struct sockaddr_in6 sa; int sa_len, sock_family, sock_type, sock_protocol; int sockfd; (void) memset((void*) &sa, 0, sizeof(sa)); (void) memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; (void) snprintf(portstr, sizeof(portstr), "%d", (int) port); if((gaierr = getaddrinfo(hostname, portstr, &hints, &ai)) != 0) { (void) fprintf(stderr, "%s: getaddrinfo %s - %s\n", argv0, hostname, gai_strerror(gaierr)); exit(1); } aiv4 = (struct addrinfo*) 0; aiv6 = (struct addrinfo*) 0; for(ai2 = ai; ai2 != (struct addrinfo*) 0; ai2 = ai2->ai_next) { switch(ai2->ai_family) { case AF_INET: if(aiv4 == (struct addrinfo*) 0) { aiv4 = ai2; } break; case AF_INET6: if(aiv6 == (struct addrinfo*) 0) { aiv6 = ai2; } break; } } if(aiv4 != (struct addrinfo*) 0) { if(sizeof(sa) < aiv4->ai_addrlen) { (void) fprintf(stderr, "%s - sockaddr too small (%lu < %lu)\n", hostname, (unsigned long) sizeof(sa), (unsigned long) aiv4->ai_addrlen); exit(1); } sock_family = aiv4->ai_family; sock_type = aiv4->ai_socktype; sock_protocol = aiv4->ai_protocol; sa_len = aiv4->ai_addrlen; (void) memmove(&sa, aiv4->ai_addr, sa_len); goto ok; } if(aiv6 != (struct addrinfo*) 0) { if(sizeof(sa) < aiv6->ai_addrlen) { (void) fprintf(stderr, "%s - sockaddr too small (%lu < %lu)\n", hostname, (unsigned long) sizeof(sa), (unsigned long) aiv6->ai_addrlen); exit(1); } sock_family = aiv6->ai_family; sock_type = aiv6->ai_socktype; sock_protocol = aiv6->ai_protocol; sa_len = aiv6->ai_addrlen; (void) memmove(&sa, aiv6->ai_addr, sa_len); goto ok; } (void) fprintf(stderr, "%s: no valid address found for host %s\n", argv0, hostname); exit(1); ok: freeaddrinfo(ai); sockfd = socket(sock_family, sock_type, sock_protocol); if(sockfd < 0) { show_error("socket"); } if(connect(sockfd, (struct sockaddr*) &sa, sa_len) < 0) { show_error("connect"); } return sockfd; } void postURL(char* url, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value, char** args, int argc) { char* s; int protocol; char host[2000]; int host_len; unsigned short port; char* file = 0; char* http = "http://"; int http_len = strlen(http); char* https = "https://"; int https_len = strlen(https); int proto_len; if(url == (char*) 0) { (void) fprintf(stderr, "%s: null URL\n", argv0); exit(1); } if(strncmp(http, url, http_len) == 0) { proto_len = http_len; protocol = PROTO_HTTP; } else if(strncmp(https, url, https_len) == 0) { proto_len = https_len; protocol = PROTO_HTTPS; } else { (void) fprintf(stderr, "%s: non-http URL\n", argv0); exit(1); } for(s = url + proto_len; *s != '\0' && *s != ':' && *s != '/'; ++s); host_len = s - url; host_len -= proto_len; strncpy(host, url + proto_len, host_len); host[host_len] = '\0'; if(*s == ':') { port = (unsigned short) atoi(++s); while(*s != '\0' && *s != '/') { ++s; } } else { if(protocol == PROTO_HTTPS) { port = 443; } else { port = 80; } } if(*s == '\0') { file = "/"; } else { file = s; } postURLbyParts(protocol, host, port, file, referer, user_agent, auth_token, ncookies, cookies, header_name, header_value, args, argc); } void postURLbyParts(int protocol, char* host, unsigned short port, char* file, char* referer, char* user_agent, char* auth_token, int ncookies, char** cookies, char* header_name, char* header_value, char** args, int argc) { int sockfd; SSL_CTX* ssl_ctx = (SSL_CTX*) 0; SSL* ssl = (SSL*) 0; char head_buf[20000]; int max_arg, total_bytes; int multipart, next_arg_is_file; static const char* const sep = "http_post-content-separator"; char* data_buf; char* enc_buf; int head_bytes, data_bytes, i, header_state; char* eq; (void) alarm(timeout); sockfd = open_client_socket(host, port); if(protocol == PROTO_HTTPS) { int r; ssl_ctx = SSL_CTX_new(TLSv1_client_method()); SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_NONE, 0); ssl = SSL_new(ssl_ctx); SSL_set_fd(ssl, sockfd); r = SSL_connect(ssl); if(r <= 0) { (void) fprintf(stderr, "%s: %s - SSL connection failed - %d\n", argv0, url, r); exit(1); } } multipart = 0; total_bytes = max_arg = 0; next_arg_is_file = 0; for(i = 0; i < argc ; ++i) { int l = strlen(args[i]); if(strcmp(args[i], "-f") == 0) { multipart = 1; next_arg_is_file = 1; continue; } total_bytes += l; if(l > max_arg) { max_arg = l; } if(next_arg_is_file) { eq = strchr(args[i], '='); if(eq == (char*) 0) { (void) fprintf(stderr, "%s: missing filename\n", argv0); exit(1); } else { ++eq; total_bytes += file_bytes(eq); } next_arg_is_file = 0; } } if(multipart) { for(i = 0; i < argc ; ++i) { total_bytes += strlen(sep) * 2 + 100; } total_bytes += strlen(sep) * 2; } else { total_bytes *= 4; enc_buf = (char*) malloc_check(max_arg * 4); } data_buf = (char*) malloc_check(total_bytes); if(multipart) { next_arg_is_file = 0; data_bytes = 0; for(i = 0; i < argc; ++i) { if(strcmp(args[i], "-f") == 0) { next_arg_is_file = 1; continue; } eq = strchr(args[i], '='); if(eq == (char*) 0) { data_bytes += sprintf(&data_buf[data_bytes], "--%s\r\nContent-Disposition: form-data\r\n\r\n%s\r\n", sep, args[i]); } else { *eq++ = '\0'; if(next_arg_is_file) { data_bytes += sprintf(&data_buf[data_bytes], "--%s\r\nContent-Disposition: form-data; name=\"%s\"; filename=\"%s\"\r\n\r\n", sep, args[i], eq); data_bytes += file_copy(eq, &data_buf[data_bytes]); data_bytes += sprintf(&data_buf[data_bytes], "\r\n"); next_arg_is_file = 0; } else { data_bytes += sprintf(&data_buf[data_bytes], "--%s\r\nContent-Disposition: form-data; name=\"%s\"\r\n\r\n%s\r\n", sep, args[i], eq); } } } data_bytes += sprintf(&data_buf[data_bytes], "--%s--\r\n", sep); } else { data_bytes = 0; for(i = 0; i < argc ; ++i) { if(data_bytes > 0) { data_bytes += sprintf(&data_buf[data_bytes], "&"); } eq = strchr(args[i], '='); if(eq == (char*) 0) { strencode(enc_buf, args[i]); data_bytes += sprintf(&data_buf[data_bytes], "%s", enc_buf); } else { *eq++ = '\0'; strencode(enc_buf, args[i]); data_bytes += sprintf(&data_buf[data_bytes], "%s=", enc_buf); strencode(enc_buf, eq); data_bytes += sprintf(&data_buf[data_bytes], "%s", enc_buf); } } } (void) alarm(timeout); head_bytes = snprintf(head_buf, sizeof(head_buf), "POST %s HTTP/1.0\r\n", file); head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Host: %s\r\n", host); head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Content-Length: %d\r\n", data_bytes); if(referer != (char*) 0) { head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Referer: %s\r\n", referer); } head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "User-Agent: %s\r\n", user_agent); if(multipart) { head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Content-type: multipart/form-data; boundary=\"%s\"\r\n", sep); } else { head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Content-type: application/x-www-form-urlencoded\r\n"); } head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Accept: */*\r\n"); head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Accept-Language: en\r\n"); head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Accept-Charset: iso-8859-1,*,utf-8\r\n"); if(auth_token != (char*) 0) { char token_buf[1000]; token_buf[b64_encode((unsigned char*) auth_token, strlen(auth_token), token_buf, sizeof(token_buf))] = '\0'; head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Authorization: Basic %s\r\n", token_buf); } for(i = 0; i < ncookies; ++i) { head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "Cookie: %s\r\n", cookies[i]); } if(header_name != (char*) 0) { head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "%s: %s\r\n", header_name, header_value); } head_bytes += snprintf(&head_buf[head_bytes], sizeof(head_buf) - head_bytes, "\r\n"); if(protocol == PROTO_HTTPS) { (void) SSL_write(ssl, head_buf, head_bytes); } else { (void) write(sockfd, head_buf, head_bytes); } if(protocol == PROTO_HTTPS) { (void) SSL_write(ssl, data_buf, data_bytes); } else { (void) write(sockfd, data_buf, data_bytes); } (void) alarm(timeout); header_state = HDST_BOL; for(;;) { if(protocol == PROTO_HTTPS) { head_bytes = SSL_read(ssl, head_buf, sizeof(head_buf)); } else { head_bytes = read(sockfd, head_buf, sizeof(head_buf)); } if(head_bytes <= 0) { break; } for(i = 0; i < head_bytes; ++i) { if(verbose) { (void) write(1, &head_buf[i], 1); } switch(header_state) { case HDST_BOL: switch(head_buf[i]) { case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_TEXT: switch(head_buf[i]) { case '\n': header_state = HDST_LF; break; case '\r': header_state = HDST_CR; break; } break; case HDST_LF: switch(head_buf[i]) { case '\n': goto end_of_headers; case '\r': header_state = HDST_CR; break; default: header_state = HDST_TEXT; break; } break; case HDST_CR: switch(head_buf[i]) { case '\n': header_state = HDST_CRLF; break; case '\r': goto end_of_headers; default: header_state = HDST_TEXT; break; } break; case HDST_CRLF: switch(head_buf[i]) { case '\n': goto end_of_headers; case '\r': header_state = HDST_CRLFCR; break; default: header_state = HDST_TEXT; break; } break; case HDST_CRLFCR: switch(head_buf[i]) { case '\n': case '\r': goto end_of_headers; default: header_state = HDST_TEXT; break; } break; } } } end_of_headers: if(head_bytes > 0) { ++i; (void) write(1, &head_buf[i], head_bytes - i); } for(;;) { (void) alarm(timeout); if(protocol == PROTO_HTTPS) { head_bytes = SSL_read(ssl, head_buf, sizeof(head_buf)); } else { head_bytes = read(sockfd, head_buf, sizeof(head_buf)); } if(head_bytes == 0) { break; } if(head_bytes < 0) { show_error("read"); } (void) write(1, head_buf, head_bytes); } if(protocol == PROTO_HTTPS) { SSL_free(ssl); SSL_CTX_free(ssl_ctx); } (void) close(sockfd); } void show_error(char* cause) { char buf[5000]; (void) sprintf(buf, "%s: %s - %s", argv0, url, cause); perror(buf); exit(1); } void sigcatch(int sig) { (void) fprintf(stderr, "%s: %s - timed out\n", argv0, url); exit(1); } void strencode(char* to, char* from) { int tolen; for(tolen = 0; *from != '\0'; ++from) { if(isalnum(*from) || strchr("/_.", *from) != (char*) 0) { *to = *from; ++to; ++tolen; } else { (void) sprintf(to, "%%%02x", (int) *from & 0xff); to += 3; tolen += 3; } } *to = '\0'; } void usage() { (void) fprintf(stderr, "usage: %s [-c cookie] [-t timeout] [-r referer] [-u user-agent] [-a username:password] [-h header value] [-v] url\n", argv0); exit(1); }