From cb75a749e948a90ce5141f0f80891ab338dbb356 Mon Sep 17 00:00:00 2001 From: Georg Hopp Date: Thu, 9 Feb 2012 22:34:32 +0100 Subject: [PATCH] start split of request parser --- ChangeLog | 6 +- include/http/header.h | 18 + include/http/request.h | 19 +- include/http/response.h | 23 ++ src/Makefile.am | 5 +- src/http/header.c | 69 ++++ src/http/header/get.c | 41 +++ src/http/header/sort.c | 19 ++ src/http/request.c | 6 +- src/http/request/parser.c | 100 ++++++ src/http/request/parser/get_header.c | 18 + src/http/request/parser/get_request_line.c | 29 ++ src/http/request/parser/parse.c | 150 ++++++++ src/http/{request_queue.c => request/queue.c} | 0 src/http/request_parser.c | 323 ------------------ 15 files changed, 487 insertions(+), 339 deletions(-) create mode 100644 include/http/header.h create mode 100644 include/http/response.h create mode 100644 src/http/header.c create mode 100644 src/http/header/get.c create mode 100644 src/http/header/sort.c create mode 100644 src/http/request/parser.c create mode 100644 src/http/request/parser/get_header.c create mode 100644 src/http/request/parser/get_request_line.c create mode 100644 src/http/request/parser/parse.c rename src/http/{request_queue.c => request/queue.c} (100%) delete mode 100644 src/http/request_parser.c diff --git a/ChangeLog b/ChangeLog index 8715917..03a8b6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,10 @@ +2012-02-09 22:34:32 +0100 Georg Hopp + + * start split of request parser (HEAD, master) + 2012-02-09 11:44:17 +0100 Georg Hopp - * no more request body debig output (HEAD, master) + * no more request body debig output (origin/master, origin/HEAD) 2012-02-09 11:32:28 +0100 Georg Hopp diff --git a/include/http/header.h b/include/http/header.h new file mode 100644 index 0000000..0b9696c --- /dev/null +++ b/include/http/header.h @@ -0,0 +1,18 @@ +#ifndef __HTTP_HEADER_H__ +#define __HTTP_HEADER_H__ + +#include "class.h" + +CLASS(HttpHeader) { + unsigned long hash; + char * name; + char * value; +}; + +HttpHeader httpHeaderParse(char * line); // @INFO: destructive +void httpHeaderSort(const HttpHeader [], int); +char * httpHeaderGet(const HttpHeader [], int, const char *); + +#endif // __HTTP_HEADER_H__ + +// vim: set ts=4 sw=4: diff --git a/include/http/request.h b/include/http/request.h index 22cb46c..09d864e 100644 --- a/include/http/request.h +++ b/include/http/request.h @@ -2,21 +2,18 @@ #define __HTTP_REQUEST_H__ #include "class.h" +#include "http/header.h" CLASS(HttpRequest) { - char * http_version; - char * uri; - char * method; + char * method; + char * uri; + char * version; - struct HttpRequestHeader { - unsigned long hash; - char * name; - char * value; - } header[128]; - int nheader; + HttpHeader header[128]; + int nheader; - char * body; - int nbody; + char * body; + int nbody; }; char * diff --git a/include/http/response.h b/include/http/response.h new file mode 100644 index 0000000..f708d37 --- /dev/null +++ b/include/http/response.h @@ -0,0 +1,23 @@ +#ifndef __HTTP_RESPONSE_H__ +#define __HTTP_RESPONSE_H__ + +#include "class.h" + +CLASS(HttpResponse) { + char * http_version; + char * status; + char * reson; + + HttpHeader header[128]; + int nheader; + + char * body; + int nbody; +}; + +char * +httpRequestHeaderGet(HttpRequest this, const char * name); + +#endif /* __HTTP_RESPONSE_H__ */ + +// vim: set ts=4 sw=4: diff --git a/src/Makefile.am b/src/Makefile.am index 003da48..b2f7eee 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -5,7 +5,10 @@ CLASS = class.c interface.c interface/class.c SOCKET = socket.c socket/accept.c socket/connect.c socket/listen.c SERVER = server.c server/run.c server/close_conn.c LOGGER = logger.c logger/stderr.c logger/syslog.c interface/logger.c -HTTP = interface/stream_reader.c http/request_parser.c http/request.c http/request_queue.c http/request/header_get.c +HTTP = interface/stream_reader.c http/request/parser.c http/request.c \ + http/request/queue.c http/header.c http/header/get.c \ + http/header/sort.c http/request/parser/get_header.c \ + http/request/parser/parse.c http/request/parser/get_request_line.c AM_CFLAGS = -Wall -I ../include/ diff --git a/src/http/header.c b/src/http/header.c new file mode 100644 index 0000000..f16e5fb --- /dev/null +++ b/src/http/header.c @@ -0,0 +1,69 @@ +#include +#include +#include + +#include "class.h" +#include "interface/class.h" + +#include "http/header.h" + +/** + * SDBM hashing algorithm: + * + * this algorithm was created for sdbm (a public-domain reimplementation of + * ndbm) database library. it was found to do well in scrambling bits, + * causing better distribution of the keys and fewer splits. it also happens + * to be a good general hashing function with good distribution. the actual + * function is hash(i) = hash(i - 1) * 65599 + str[i]; what is included below + * is the faster version used in gawk. [there is even a faster, duff-device + * version] the magic constant 65599 was picked out of thin air while + * experimenting with different constants, and turns out to be a prime. this + * is one of the algorithms used in berkeley db (see sleepycat) and elsewhere. + */ +static +inline +unsigned long +sdbm(unsigned char * str) +{ + unsigned long hash = 0; + int c; + + while ((c = tolower(*str++))) + hash = c + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +static +void +ctor(void * _this, va_list * params) { + HttpHeader this = _this; + char * name; + char * value; + + name = va_arg(* params, char *); + value = va_arg(* params, char *); + + this->name = malloc(strlen(name) + 1); + strcpy(this->name, name); + + this->hash = sdbm((unsigned char *)name); + + this->value = malloc(strlen(value) + 1); + strcpy(this->value, value); +} + +static +void +dtor(void * _this) +{ + HttpHeader this = _this; + + free(this->name); + free(this->value); +} + +INIT_IFACE(Class, ctor, dtor, NULL); +CREATE_CLASS(HttpHeader, NULL, IFACE(Class)); + +// vim: set ts=4 sw=4: diff --git a/src/http/header/get.c b/src/http/header/get.c new file mode 100644 index 0000000..f46b47f --- /dev/null +++ b/src/http/header/get.c @@ -0,0 +1,41 @@ +#include +#include + +#include "http/header.h" + +static +inline +unsigned long +sdbm(const unsigned char * str) +{ + unsigned long hash = 0; + int c; + + while ((c = tolower(*str++))) + hash = c + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +static +inline +int +comp (const void * _a, const void * _b) +{ + unsigned long a = *(unsigned long *)_a; + const HttpHeader b = *(const HttpHeader *)_b; + return (a < b->hash)? -1 : (a > b->hash)? 1 : 0; +} + +char * +httpHeaderGet(const HttpHeader header[], int nheader, const char * name) +{ + unsigned long hash = sdbm((unsigned char *)name); + HttpHeader found; + + found = bsearch(&hash, header, nheader, sizeof(HttpHeader), comp); + + return (NULL != found)? found->value : NULL; +} + +// vim: set ts=4 sw=4: diff --git a/src/http/header/sort.c b/src/http/header/sort.c new file mode 100644 index 0000000..4c1ded9 --- /dev/null +++ b/src/http/header/sort.c @@ -0,0 +1,19 @@ +#include "http/header.h" + +static +inline +int +comp (const void * _a, const void * _b) +{ + const HttpHeader a = *(const HttpHeader *)_a; + const HttpHeader b = *(const HttpHeader *)_b; + return (a->hash < b->hash)? -1 : (a->hash > b->hash)? 1 : 0; +} + +void +httpHeaderSort(const HttpHeader header[], int nheader) +{ + qsort(header, nheader, sizeof(HttpHeader), comp); +} + +// vim: set ts=4 sw=4: diff --git a/src/http/request.c b/src/http/request.c index b593b60..39ce73e 100644 --- a/src/http/request.c +++ b/src/http/request.c @@ -26,13 +26,13 @@ dtor(void * _this) HttpRequest this = _this; int i; - _free((void **)&(this->http_version)); + _free((void **)&(this->version)); _free((void **)&(this->uri)); _free((void **)&(this->method)); for (i=0; i<128; i++) { - _free((void **)&((this->header)[i].name)); - _free((void **)&((this->header)[i].value)); + if (NULL == (this->header)[i]) break; + delete(&(this->header)[i]); } _free((void **)&(this->body)); diff --git a/src/http/request/parser.c b/src/http/request/parser.c new file mode 100644 index 0000000..1d147e9 --- /dev/null +++ b/src/http/request/parser.c @@ -0,0 +1,100 @@ +#include +#include +#include + +#include "class.h" +#include "http/request_parser.h" +#include "interface/class.h" +#include "interface/stream_reader.h" +#include "http/request.h" +#include "http/request_queue.h" + +void httpRequestParserParse(HttpRequestParser); + +static +void +ctor(void * _this, va_list * params) +{ + HttpRequestParser this = _this; + + this->request_queue = new(HttpRequestQueue); + + this->buffer = malloc(HTTP_REQUEST_PARSER_READ_CHUNK); + this->buffer[0] = 0; +} + +static +void +dtor(void * _this) +{ + HttpRequestParser this = _this; + + free(this->buffer); + delete(&(this->request_queue)); +} + +static +void +_clone(void * _this, void * _base) +{ + HttpRequestParser this = _this; + HttpRequestParser base = _base; + size_t chunks; + + /** + * every parser has its own queue... + */ + this->request_queue = new(HttpRequestQueue); + this->buffer_used = base->buffer_used; + + chunks = this->buffer_used / HTTP_REQUEST_PARSER_READ_CHUNK; + chunks++; + + this->buffer = malloc(chunks * HTTP_REQUEST_PARSER_READ_CHUNK); + memcpy(this->buffer, base->buffer, this->buffer_used); +} + +static +size_t +get_data(void * _this, int fd) +{ + HttpRequestParser this = _this; + size_t remaining, chunks; + char buffer[1024]; + + size_t size = read(fd, buffer, 1024); + + if (0 < size) { + remaining = this->buffer_used % HTTP_REQUEST_PARSER_READ_CHUNK; + chunks = this->buffer_used / HTTP_REQUEST_PARSER_READ_CHUNK; + + /** + * because a division always rounds down + * chunks holds exactly the currently allocated chunks if + * remaining equals 0 but there is no space left. + * Else chunks holds the actually allocated amount of chunks + * minus 1. + * For this reason chunks always has to be increased by 1. + */ + chunks++; + + if (size >= remaining) { + this->buffer = + realloc(this->buffer, chunks * HTTP_REQUEST_PARSER_READ_CHUNK); + } + + memcpy(this->buffer + this->buffer_used, buffer, size); + this->buffer_used += size; + this->buffer[this->buffer_used] = 0; + + httpRequestParserParse(this); + } + + return size; +} + +INIT_IFACE(Class, ctor, dtor, _clone); +INIT_IFACE(StreamReader, get_data); +CREATE_CLASS(HttpRequestParser, NULL, IFACE(Class), IFACE(StreamReader)); + +// vim: set ts=4 sw=4: diff --git a/src/http/request/parser/get_header.c b/src/http/request/parser/get_header.c new file mode 100644 index 0000000..eb1cab8 --- /dev/null +++ b/src/http/request/parser/get_header.c @@ -0,0 +1,18 @@ +#include "class.h" +#include "interface/class.h" +#include "http/header.h" +#include "http/request.h" + +void +httpRequestParserGetHeader(HttpRequest request, char * line) +{ + char * name = line; + char * value = strchr(line, ':'); + + *value = 0; + for (; *value == ' ' && *value != 0; value++); + + (request->header)[request->nheader++] = new(HttpHeader, name, value); +} + +// vim: set ts=4 sw=4: diff --git a/src/http/request/parser/get_request_line.c b/src/http/request/parser/get_request_line.c new file mode 100644 index 0000000..8f87525 --- /dev/null +++ b/src/http/request/parser/get_request_line.c @@ -0,0 +1,29 @@ +#include + +#include "http/request.h" + + +void +httpRequestParserGetRequestLine(HttpRequest request, char * line) +{ + char * method, * uri, * version; + + method = line; + + uri = strchr(line, ' '); + *uri++ = 0; + for (; *uri == ' ' && *uri != 0; uri++); + + version = strchr(uri, ' '); + *version++ = 0; + for (; *version == ' ' && *version != 0; version++); + + request->method = malloc(strlen(method) + 1); + strcpy(request->method, method); + request->uri = malloc(strlen(uri) + 1); + strcpy(request->uri, uri); + request->version = malloc(strlen(version) + 1); + strcpy(request->version, method); +} + +// vim: set ts=4 sw=4: diff --git a/src/http/request/parser/parse.c b/src/http/request/parser/parse.c new file mode 100644 index 0000000..6878679 --- /dev/null +++ b/src/http/request/parser/parse.c @@ -0,0 +1,150 @@ +#include +#include +#include +#include + +#include "http/request_parser.h" +#include "interface/class.h" + + +#define REMAINS(pars,done) \ + ((pars)->buffer_used - ((done) - (pars)->buffer)) + + +static +inline +char * +httpRequestParserGetLine(char ** data) +{ + char * line_end = strstr(*data, "\r\n"); + char * ret = *data; + + if (NULL == line_end) { + return NULL; + } + + *line_end = 0; + *data = line_end + 2; + + return ret; +} + +static +inline +void +httpRequestSkip(char ** data) +{ + for (; 0 != **data && ! isalpha(**data); (*data)++); +} + +void httpRequestParserGetRequestLine(HttpRequest, char *); + +void +httpRequestParserParse(HttpRequestParser this) +{ + static HttpRequest request = NULL; + static char * data; // static pointer to unprocessed data + char * line; + int cont = 1; + + while(cont) { + switch(this->state) { + case HTTP_REQUEST_GARBAGE: + data = this->buffer; // initialize static pointer + httpRequestSkip(&data); + request = new(HttpRequest); + + this->state = HTTP_REQUEST_START; + break; + + case HTTP_REQUEST_START: + if (NULL == (line = httpRequestParserGetLine(&data))) { + cont = 0; + break; + } + + httpRequestParserGetRequestLine(request, line); + + this->state = HTTP_REQUEST_REQUEST_LINE_DONE; + break; + + case HTTP_REQUEST_REQUEST_LINE_DONE: + if (NULL == (line = httpRequestParserGetLine(&data))) { + cont = 0; + break; + } + + if (0 == strlen(line)) { + this->state = HTTP_REQUEST_HEADERS_DONE; + break; + } + + httpRequestParserGetHeader(request, line); + break; + + case HTTP_REQUEST_HEADERS_DONE: + httpHeaderSort(request->header, request->nheader); + + { + char * nbody; + + if (0 == request->nbody) { + nbody = httpHeaderGet( + request->header, + request->nheader, + "Content-Length"); + + if (NULL == nbody) { + this->state = HTTP_REQUEST_DONE; + break; + } + else { + request->nbody = atoi(nbody); + } + } + + if (REMAINS(this, data) >= request->nbody) { + request->body = calloc(1, request->nbody + 1); + memcpy(request->body, data, request->nbody); + data += request->nbody; + this->state = HTTP_REQUEST_DONE; + } + } + + break; + + case HTTP_REQUEST_DONE: + /** + * enqueue current request + */ + this->request_queue->requests[(this->request_queue->nrequests)++] = + request; + + /** + * remove processed stuff from input buffer. + */ + memmove(this->buffer, data, REMAINS(this, data)); + + this->buffer_used -= data - this->buffer; + + /** + * dont continue loop if input buffer is empty + */ + if (0 == this->buffer_used) { + cont = 0; + } + + /** + * prepare for next request + */ + this->state = HTTP_REQUEST_GARBAGE; + + break; + + default: + break; + } + } +} + +// vim: set ts=4 sw=4: diff --git a/src/http/request_queue.c b/src/http/request/queue.c similarity index 100% rename from src/http/request_queue.c rename to src/http/request/queue.c diff --git a/src/http/request_parser.c b/src/http/request_parser.c deleted file mode 100644 index 2976c09..0000000 --- a/src/http/request_parser.c +++ /dev/null @@ -1,323 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "class.h" -#include "http/request_parser.h" -#include "interface/class.h" -#include "interface/stream_reader.h" -#include "http/request.h" -#include "http/request_queue.h" - -static -void -httpRequestParserParse(HttpRequestParser); - -static -void -ctor(void * _this, va_list * params) -{ - HttpRequestParser this = _this; - - this->request_queue = new(HttpRequestQueue); - - this->buffer = malloc(HTTP_REQUEST_PARSER_READ_CHUNK); - this->buffer[0] = 0; -} - -static -void -dtor(void * _this) -{ - HttpRequestParser this = _this; - - free(this->buffer); - delete(&(this->request_queue)); -} - -static -void -_clone(void * _this, void * _base) -{ - HttpRequestParser this = _this; - HttpRequestParser base = _base; - size_t chunks; - - /** - * every parser has its own queue... - */ - this->request_queue = new(HttpRequestQueue); - this->buffer_used = base->buffer_used; - - chunks = this->buffer_used / HTTP_REQUEST_PARSER_READ_CHUNK; - chunks++; - - this->buffer = malloc(chunks * HTTP_REQUEST_PARSER_READ_CHUNK); - memcpy(this->buffer, base->buffer, this->buffer_used); -} - -static -size_t -get_data(void * _this, int fd) -{ - HttpRequestParser this = _this; - size_t remaining, chunks; - char buffer[1024]; - - size_t size = read(fd, buffer, 1024); - - if (0 < size) { - remaining = this->buffer_used % HTTP_REQUEST_PARSER_READ_CHUNK; - chunks = this->buffer_used / HTTP_REQUEST_PARSER_READ_CHUNK; - - /** - * because a division always rounds down - * chunks holds exactly the currently allocated chunks if - * remaining equals 0 but there is no space left. - * Else chunks holds the actually allocated amount of chunks - * minus 1. - * For this reason chunks always has to be increased by 1. - */ - chunks++; - - if (size >= remaining) { - this->buffer = - realloc(this->buffer, chunks * HTTP_REQUEST_PARSER_READ_CHUNK); - } - - memcpy(this->buffer + this->buffer_used, buffer, size); - this->buffer_used += size; - this->buffer[this->buffer_used] = 0; - - httpRequestParserParse(this); - } - - return size; -} - -INIT_IFACE(Class, ctor, dtor, _clone); -INIT_IFACE(StreamReader, get_data); -CREATE_CLASS(HttpRequestParser, NULL, IFACE(Class), IFACE(StreamReader)); - -static -inline -char * -httpRequestLineGet(char ** data) -{ - char * line_end = strstr(*data, "\r\n"); - char * ret = *data; - - if (NULL == line_end) { - return NULL; - } - - *line_end = 0; - *data = line_end + 2; - - return ret; -} - -static -inline -void -httpRequestSkip(char ** data) -{ - for (; 0 != **data && ! isalpha(**data); (*data)++); -} - -/** - * SDBM hashing algorithm: - * - * this algorithm was created for sdbm (a public-domain reimplementation of - * ndbm) database library. it was found to do well in scrambling bits, - * causing better distribution of the keys and fewer splits. it also happens - * to be a good general hashing function with good distribution. the actual - * function is hash(i) = hash(i - 1) * 65599 + str[i]; what is included below - * is the faster version used in gawk. [there is even a faster, duff-device - * version] the magic constant 65599 was picked out of thin air while - * experimenting with different constants, and turns out to be a prime. this - * is one of the algorithms used in berkeley db (see sleepycat) and elsewhere. - */ -static -inline -unsigned long -sdbm(unsigned char * str) -{ - unsigned long hash = 0; - int c; - - while ((c = tolower(*str++))) - hash = c + (hash << 6) + (hash << 16) - hash; - - return hash; -} - -static -inline -int -comp (const void * _a, const void * _b) -{ - const struct HttpRequestHeader * a = _a; - const struct HttpRequestHeader * b = _b; - return (a->hash < b->hash)? -1 : (a->hash > b->hash)? 1 : 0; -} - -static -void -httpRequestParserParse(HttpRequestParser this) -{ - static HttpRequest request = NULL; - static char * data; // static pointer to unprocessed data - char * line; - int cont = 1; - static int header_idx; - - while(cont) { - switch(this->state) { - case HTTP_REQUEST_GARBAGE: - data = this->buffer; // initialize static pointer - httpRequestSkip(&data); - request = new(HttpRequest); - - this->state = HTTP_REQUEST_START; - break; - - case HTTP_REQUEST_START: - if (NULL == (line = httpRequestLineGet(&data))) { - cont = 0; - break; - } - - { - char * delim = strchr(line, ' '); - - if (NULL != delim) { - *delim = 0; - request->method = malloc(strlen(line) + 1); - strcpy(request->method, line); - line = delim + 1; - - for (; *line == ' ' && *line != 0; line++); - - if (0 != *line) { - delim = strchr(line, ' '); - - if (NULL != delim) { - *delim = 0; - request->uri = malloc(strlen(line) + 1); - strcpy(request->uri, line); - line = delim + 1; - - for (; *line == ' ' && *line != 0; line++); - - if (0 != *line) { - request->http_version = malloc(strlen(line) + 1); - strcpy(request->http_version, line); - } - } - } - } - } - - header_idx = 0; - this->state = HTTP_REQUEST_REQUEST_LINE_DONE; - break; - - case HTTP_REQUEST_REQUEST_LINE_DONE: - if (NULL == (line = httpRequestLineGet(&data))) { - cont = 0; - break; - } - - if (0 == strlen(line)) { - this->state = HTTP_REQUEST_HEADERS_DONE; - break; - } - - { - char * delim = strchr(line, ':'); - - *delim = 0; - (request->header)[header_idx].name = malloc(strlen(line) + 1); - strcpy((request->header)[header_idx].name, line); - (request->header)[header_idx].hash = sdbm((unsigned char *)line); - - line = delim + 1; - for (; *line == ' ' && *line != 0; line++); - - (request->header)[header_idx].value = malloc(strlen(line) + 1); - strcpy((request->header)[header_idx].value, line); - - header_idx++; - request->nheader++; - } - - break; - - case HTTP_REQUEST_HEADERS_DONE: - /** - * @TODO: here comes the body handling - */ - qsort( - request->header, - request->nheader, - sizeof(struct HttpRequestHeader), - comp); - - { - char * bodylen; - - bodylen = httpRequestHeaderGet(request, "Content-Length"); - - if (NULL != bodylen) { - request->nbody = atoi(bodylen); - request->body = calloc(1, request->nbody + 1); - memcpy(request->body, data, request->nbody); - data += request->nbody; - } - } - - this->state = HTTP_REQUEST_DONE; - break; - - case HTTP_REQUEST_DONE: - /** - * enqueue current request - */ - this->request_queue->requests[(this->request_queue->nrequests)++] = - request; - - /** - * remove processed stuff from input buffer. - */ - memmove(this->buffer, - data, - this->buffer_used - (data - this->buffer) + 1); - - this->buffer_used -= data - this->buffer; - - /** - * dont continue loop if input buffer is empty - */ - if (0 == this->buffer_used) { - cont = 0; - } - - /** - * prepare for next request - */ - this->state = HTTP_REQUEST_GARBAGE; - - break; - - default: - break; - } - } -} - -// vim: set ts=4 sw=4: