/********************************************************************\ * BitlBee -- An IRC to other IM-networks gateway * * * * Copyright 2002-2013 Wilmer van der Gaast and others * \********************************************************************/ /* HTTP(S) module */ /* This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License with the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL; if not, write to the Free Software Foundation, Inc., 51 Franklin St., Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "http_client.h" #include "url.h" #include "sock.h" static gboolean http_connected(gpointer data, int source, b_input_condition cond); static gboolean http_ssl_connected(gpointer data, int returncode, void *source, b_input_condition cond); static gboolean http_incoming_data(gpointer data, int source, b_input_condition cond); static void http_free(struct http_request *req); struct http_request *http_dorequest(char *host, int port, int ssl, char *request, http_input_function func, gpointer data) { struct http_request *req; int error = 0; req = g_new0(struct http_request, 1); if (ssl) { req->ssl = ssl_connect(host, port, TRUE, http_ssl_connected, req); if (req->ssl == NULL) { error = 1; } } else { req->fd = proxy_connect(host, port, http_connected, req); if (req->fd < 0) { error = 1; } } if (error) { http_free(req); return NULL; } req->func = func; req->data = data; req->request = g_strdup(request); req->request_length = strlen(request); req->redir_ttl = 3; req->content_length = -1; if (getenv("BITLBEE_DEBUG")) { printf("About to send HTTP request:\n%s\n", req->request); } return req; } struct http_request *http_dorequest_url(char *url_string, http_input_function func, gpointer data) { url_t *url = g_new0(url_t, 1); char *request; void *ret; if (!url_set(url, url_string)) { g_free(url); return NULL; } if (url->proto != PROTO_HTTP && url->proto != PROTO_HTTPS) { g_free(url); return NULL; } request = g_strdup_printf("GET %s HTTP/1.0\r\n" "Host: %s\r\n" "User-Agent: BitlBee " BITLBEE_VERSION "\r\n" "\r\n", url->file, url->host); ret = http_dorequest(url->host, url->port, url->proto == PROTO_HTTPS, request, func, data); g_free(url); g_free(request); return ret; } /* This one is actually pretty simple... Might get more calls if we can't write the whole request at once. */ static gboolean http_connected(gpointer data, int source, b_input_condition cond) { struct http_request *req = data; int st; if (source < 0) { goto error; } if (req->inpa > 0) { b_event_remove(req->inpa); } sock_make_nonblocking(req->fd); if (req->ssl) { st = ssl_write(req->ssl, req->request + req->bytes_written, req->request_length - req->bytes_written); if (st < 0) { if (ssl_errno != SSL_AGAIN) { ssl_disconnect(req->ssl); goto error; } } } else { st = write(source, req->request + req->bytes_written, req->request_length - req->bytes_written); if (st < 0) { if (!sockerr_again()) { closesocket(req->fd); goto error; } } } if (st > 0) { req->bytes_written += st; } if (req->bytes_written < req->request_length) { req->inpa = b_input_add(source, req->ssl ? ssl_getdirection(req->ssl) : B_EV_IO_WRITE, http_connected, req); } else { req->inpa = b_input_add(source, B_EV_IO_READ, http_incoming_data, req); } return FALSE; error: if (req->status_string == NULL) { req->status_string = g_strdup("Error while writing HTTP request"); } if (req->func != NULL) { req->func(req); } http_free(req); return FALSE; } static gboolean http_ssl_connected(gpointer data, int returncode, void *source, b_input_condition cond) { struct http_request *req = data; if (source == NULL) { if (returncode != 0) { char *err = ssl_verify_strerror(returncode); req->status_string = g_strdup_printf( "Certificate verification problem 0x%x: %s", returncode, err ? err : "Unknown"); g_free(err); } return http_connected(data, -1, cond); } req->fd = ssl_getfd(source); return http_connected(data, req->fd, cond); } typedef enum { CR_OK, CR_EOF, CR_ERROR, CR_ABORT, } http_ret_t; static gboolean http_handle_headers(struct http_request *req); static http_ret_t http_process_chunked_data(struct http_request *req, const char *buffer, int len); static http_ret_t http_process_data(struct http_request *req, const char *buffer, int len); static gboolean http_incoming_data(gpointer data, int source, b_input_condition cond) { struct http_request *req = data; char buffer[4096]; int st; if (req->inpa > 0) { b_event_remove(req->inpa); req->inpa = 0; } if (req->ssl) { st = ssl_read(req->ssl, buffer, sizeof(buffer)); if (st < 0) { if (ssl_errno != SSL_AGAIN) { /* goto cleanup; */ /* YAY! We have to deal with crappy Microsoft servers that LOVE to send invalid TLS packets that abort connections! \o/ */ goto eof; } } else if (st == 0) { goto eof; } } else { st = read(req->fd, buffer, sizeof(buffer)); if (st < 0) { if (!sockerr_again()) { req->status_string = g_strdup(strerror(errno)); goto cleanup; } } else if (st == 0) { goto eof; } } if (st > 0) { http_ret_t c; if (req->flags & HTTPC_CHUNKED) { c = http_process_chunked_data(req, buffer, st); } else { c = http_process_data(req, buffer, st); } if (c == CR_EOF) { goto eof; } else if (c == CR_ERROR || c == CR_ABORT) { return FALSE; } } if (req->content_length != -1 && req->body_size >= req->content_length) { goto eof; } if (ssl_pending(req->ssl)) { return http_incoming_data(data, source, cond); } /* There will be more! */ req->inpa = b_input_add(req->fd, req->ssl ? ssl_getdirection(req->ssl) : B_EV_IO_READ, http_incoming_data, req); return FALSE; eof: req->flags |= HTTPC_EOF; /* Maybe if the webserver is overloaded, or when there's bad SSL support... */ if (req->bytes_read == 0) { req->status_string = g_strdup("Empty HTTP reply"); goto cleanup; } cleanup: /* Avoid g_source_remove warnings */ req->inpa = 0; if (req->ssl) { ssl_disconnect(req->ssl); } else { closesocket(req->fd); } if (req->body_size < req->content_length) { req->status_code = -1; g_free(req->status_string); req->status_string = g_strdup("Response truncated"); } if (getenv("BITLBEE_DEBUG") && req) { printf("Finishing HTTP request with status: %s\n", req->status_string ? req->status_string : "NULL"); } if (req->func != NULL) { req->func(req); } http_free(req); return FALSE; } static http_ret_t http_process_chunked_data(struct http_request *req, const char *buffer, int len) { char *chunk, *eos, *s; if (len < 0) { return TRUE; } if (len > 0) { req->cbuf = g_realloc(req->cbuf, req->cblen + len + 1); memcpy(req->cbuf + req->cblen, buffer, len); req->cblen += len; req->cbuf[req->cblen] = '\0'; } /* Turns out writing a proper chunked-encoding state machine is not that simple. :-( I've tested this one feeding it byte by byte so I hope it's solid now. */ chunk = req->cbuf; eos = req->cbuf + req->cblen; while (TRUE) { int clen = 0; /* Might be a \r\n from the last chunk. */ s = chunk; while (g_ascii_isspace(*s)) { s++; } /* Chunk length. Might be incomplete. */ if (s < eos && sscanf(s, "%x", &clen) != 1) { return CR_ERROR; } while (g_ascii_isxdigit(*s)) { s++; } /* If we read anything here, it *must* be \r\n. */ if (strncmp(s, "\r\n", MIN(2, eos - s)) != 0) { return CR_ERROR; } s += 2; if (s >= eos) { break; } /* 0-length chunk means end of response. */ if (clen == 0) { return CR_EOF; } /* Wait for the whole chunk to arrive. */ if (s + clen > eos) { break; } if (http_process_data(req, s, clen) != CR_OK) { return CR_ABORT; } chunk = s + clen; } if (chunk != req->cbuf) { req->cblen = eos - chunk; s = g_memdup(chunk, req->cblen + 1); g_free(req->cbuf); req->cbuf = s; } return CR_OK; } static http_ret_t http_process_data(struct http_request *req, const char *buffer, int len) { if (len <= 0) { return CR_OK; } if (!req->reply_body) { req->reply_headers = g_realloc(req->reply_headers, req->bytes_read + len + 1); memcpy(req->reply_headers + req->bytes_read, buffer, len); req->bytes_read += len; req->reply_headers[req->bytes_read] = '\0'; if (strstr(req->reply_headers, "\r\n\r\n") || strstr(req->reply_headers, "\n\n")) { /* We've now received all headers. Look for something interesting. */ if (!http_handle_headers(req)) { return CR_ABORT; } /* Start parsing the body as chunked if required. */ if (req->flags & HTTPC_CHUNKED) { return http_process_chunked_data(req, NULL, 0); } } } else { int pos = req->reply_body - req->sbuf; req->sbuf = g_realloc(req->sbuf, req->sblen + len + 1); memcpy(req->sbuf + req->sblen, buffer, len); req->bytes_read += len; req->sblen += len; req->sbuf[req->sblen] = '\0'; req->reply_body = req->sbuf + pos; req->body_size = req->sblen - pos; } if ((req->flags & HTTPC_STREAMING) && req->reply_body && req->func != NULL) { req->func(req); } return CR_OK; } /* Splits headers and body. Checks result code, in case of 300s it'll handle redirects. If this returns FALSE, don't call any callbacks! */ static gboolean http_handle_headers(struct http_request *req) { char *end1, *end2, *s; int evil_server = 0; /* Zero termination is very convenient. */ req->reply_headers[req->bytes_read] = '\0'; /* Find the separation between headers and body, and keep stupid webservers in mind. */ end1 = strstr(req->reply_headers, "\r\n\r\n"); end2 = strstr(req->reply_headers, "\n\n"); if (end2 && end2 < end1) { end1 = end2 + 1; evil_server = 1; } else if (end1) { end1 += 2; } else { req->status_string = g_strdup("Malformed HTTP reply"); return TRUE; } *end1 = '\0'; if (getenv("BITLBEE_DEBUG")) { printf("HTTP response headers:\n%s\n", req->reply_headers); } if (evil_server) { req->reply_body = end1 + 1; } else { req->reply_body = end1 + 2; } /* Separately allocated space for headers and body. */ req->sblen = req->body_size = req->reply_headers + req->bytes_read - req->reply_body; req->sbuf = req->reply_body = g_memdup(req->reply_body, req->body_size + 1); req->reply_headers = g_realloc(req->reply_headers, end1 - req->reply_headers + 1); if ((end1 = strchr(req->reply_headers, ' ')) != NULL) { if (sscanf(end1 + 1, "%hd", &req->status_code) != 1) { req->status_string = g_strdup("Can't parse status code"); req->status_code = -1; } else { char *eol; if (evil_server) { eol = strchr(end1, '\n'); } else { eol = strchr(end1, '\r'); } req->status_string = g_strndup(end1 + 1, eol - end1 - 1); /* Just to be sure... */ if ((eol = strchr(req->status_string, '\r'))) { *eol = 0; } if ((eol = strchr(req->status_string, '\n'))) { *eol = 0; } } } else { req->status_string = g_strdup("Can't locate status code"); req->status_code = -1; } if (((req->status_code >= 301 && req->status_code <= 303) || req->status_code == 307) && req->redir_ttl-- > 0) { char *loc, *new_request, *new_host; int error = 0, new_port, new_proto; /* We might fill it again, so let's not leak any memory. */ g_free(req->status_string); req->status_string = NULL; loc = strstr(req->reply_headers, "\nLocation: "); if (loc == NULL) { /* We can't handle this redirect... */ req->status_string = g_strdup("Can't locate Location: header"); return TRUE; } loc += 11; while (*loc == ' ') { loc++; } /* TODO/FIXME: Possibly have to handle relative redirections, and rewrite Host: headers. Not necessary for now, it's enough for passport authentication like this. */ if (*loc == '/') { /* Just a different pathname... */ /* Since we don't cache the servername, and since we don't need this yet anyway, I won't implement it. */ req->status_string = g_strdup("Can't handle relative redirects"); return TRUE; } else { /* A whole URL */ url_t *url; char *s, *version, *headers; const char *new_method; s = strstr(loc, "\r\n"); if (s == NULL) { return TRUE; } url = g_new0(url_t, 1); *s = 0; if (!url_set(url, loc)) { req->status_string = g_strdup("Malformed redirect URL"); g_free(url); return TRUE; } /* Find all headers and, if necessary, the POST request contents. Skip the old Host: header though. This crappy code here means anything using this http_client MUST put the Host: header at the top. */ if (!((s = strstr(req->request, "\r\nHost: ")) && (s = strstr(s + strlen("\r\nHost: "), "\r\n")))) { req->status_string = g_strdup("Error while rebuilding request string"); g_free(url); return TRUE; } headers = s; /* More or less HTTP/1.0 compliant, from my reading of RFC 2616. Always perform a GET request unless we received a 301. 303 was meant for this but it's HTTP/1.1-only and we're specifically speaking HTTP/1.0. ... Well except someone at identi.ca's didn't bother reading any RFCs and just return HTTP/1.1-specific status codes to HTTP/1.0 requests. Fuckers. So here we are, handle 301..303,307. */ if (strncmp(req->request, "GET", 3) == 0) { /* GETs never become POSTs. */ new_method = "GET"; } else if (req->status_code == 302 || req->status_code == 303) { /* 302 de-facto becomes GET, 303 as specified by RFC 2616#10.3.3 */ new_method = "GET"; } else { /* 301 de-facto should stay POST, 307 specifally RFC 2616#10.3.8 */ new_method = "POST"; } if ((version = strstr(req->request, " HTTP/")) && (s = strstr(version, "\r\n"))) { version++; version = g_strndup(version, s - version); } else { version = g_strdup("HTTP/1.0"); } /* Okay, this isn't fun! We have to rebuild the request... :-( */ new_request = g_strdup_printf("%s %s %s\r\nHost: %s%s", new_method, url->file, version, url->host, headers); new_host = g_strdup(url->host); new_port = url->port; new_proto = url->proto; /* If we went from POST to GET, truncate the request content. */ if (new_request[0] != req->request[0] && new_request[0] == 'G' && (s = strstr(new_request, "\r\n\r\n"))) { s[4] = '\0'; } g_free(url); g_free(version); } if (req->ssl) { ssl_disconnect(req->ssl); } else { closesocket(req->fd); } req->fd = -1; req->ssl = NULL; if (getenv("BITLBEE_DEBUG")) { printf("New headers for redirected HTTP request:\n%s\n", new_request); } if (new_proto == PROTO_HTTPS) { req->ssl = ssl_connect(new_host, new_port, TRUE, http_ssl_connected, req); if (req->ssl == NULL) { error = 1; } } else { req->fd = proxy_connect(new_host, new_port, http_connected, req); if (req->fd < 0) { error = 1; } } g_free(new_host); if (error) { req->status_string = g_strdup("Connection problem during redirect"); g_free(new_request); return TRUE; } g_free(req->request); g_free(req->reply_headers); g_free(req->sbuf); req->request = new_request; req->request_length = strlen(new_request); req->bytes_read = req->bytes_written = req->inpa = 0; req->reply_headers = req->reply_body = NULL; req->sbuf = req->cbuf = NULL; req->sblen = req->cblen = 0; return FALSE; } if ((s = get_rfc822_header(req->reply_headers, "Content-Length", 0)) && sscanf(s, "%d", &req->content_length) != 1) { req->content_length = -1; } g_free(s); if ((s = get_rfc822_header(req->reply_headers, "Transfer-Encoding", 0))) { if (strcasestr(s, "chunked")) { req->flags |= HTTPC_CHUNKED; req->cbuf = req->sbuf; req->cblen = req->sblen; req->reply_body = req->sbuf = g_strdup(""); req->body_size = req->sblen = 0; } g_free(s); } return TRUE; } void http_flush_bytes(struct http_request *req, size_t len) { if (len <= 0 || len > req->body_size || !(req->flags & HTTPC_STREAMING)) { return; } req->reply_body += len; req->body_size -= len; if (req->reply_body - req->sbuf >= 512) { char *new = g_memdup(req->reply_body, req->body_size + 1); g_free(req->sbuf); req->reply_body = req->sbuf = new; req->sblen = req->body_size; } } void http_close(struct http_request *req) { if (!req) { return; } if (req->inpa > 0) { b_event_remove(req->inpa); } if (req->ssl) { ssl_disconnect(req->ssl); } else { proxy_disconnect(req->fd); } http_free(req); } static void http_free(struct http_request *req) { g_free(req->request); g_free(req->reply_headers); g_free(req->status_string); g_free(req->sbuf); g_free(req->cbuf); g_free(req); }