""" @author Georg Hopp """ import re from Message import Message class Parser(object): def __init__(self): self._header_exp = re.compile(r"([^:]+):(.+)\r\n") self._chunk_exp = re.compile(r"([\da-f]+).*\r\n") self._req_exp = re.compile( r".*(%s) +([^ ]+) +(HTTP/\d\.\d)\r\n"%'|'.join(Message.METHODS)) self._state_exp = re.compile(r".*(HTTP/\d\.\d) *(\d{3}) *(.*)\r\n") def parse(self, message, data): """ Parse data into this message. Returns 0 when the Message is already complete or the amount of the successfully parsed data. @message: An HttpMessage instance where the data is parsed into. @data: The data to be parsed. """ end = 0 if 0 == message.state: if message.isRequest() or message.isResponse(): message.reset() end += self.parseStartLine(message, data) if message.startlineReady() and not message.headersReady(): end += self.parseHeaders(message, data[end:]) if message.headersReady() and not message.bodyReady(): end += self.parseBody(message, data[end:]) return end def parseStartLine(self, message, data): """ Parse data into the HTTP message startline, either a Request- or a Statusline. This will set the message start_line if the given data matches the start_exp expression. In that case it will also set the start_ready flag. Returns the position of the data that is not parsed. @message: An HttpMessage instance where the data is parsed into. @data: The data to be parsed. """ end = 0 match = self._parseRequest(message, data) if match: end = match.end() match = self._parseResponse(message, data) if match: end = match.end() if 0 != end: message.state |= Message.START_READY else: end = self._checkInvalid(message, data[end:]) return end def parseHeaders(self, message, data): """ Parse data into the headers of a message. Returns the position of the data that is not parsed. @message: An HttpMessage instance where the data is parsed into. @data: The data to be parsed. """ end = 0 match = self._header_exp.match(data[end:]) while match and "\r\n" != data[end:end+2]: message.setHeader(match.group(1).strip(), match.group(2).strip()) end += match.end() match = self._header_exp.match(data[end:]) if "\r\n" == data[end:end+2]: # a single \r\n at the beginning indicates end of headers. if message.headerKeyExists('Content-Length'): message._chunk_size = int(message.getHeader('Content-Length')) elif message.headerKeyExists('Transfer-Encoding') and \ 'chunked' in message.getHeader('Transfer-Encoding'): message._chunked = True else: message.state |= Message.BODY_READY message.state |= Message.HEADERS_READY end += 2 else: end += self._checkInvalid(message, data[end:]) return end def parseBody(self, message, data): """ Parse data into the body of a message. This is also capable of handling chunked bodies as defined for HTTP/1.1. Returns the position of the data that is not parsed. @message: An HttpMessage instance where the data is parsed into. @data: The data to be parsed. """ readlen = 0 if message._chunked and 0 == message._chunk_size: match = self._chunk_exp.match(data) if match is None: return 0 message._chunk_size = int(match.group(1), 16) readlen += match.end() data = data[match.end():] if 0 == self._chunk_size: message.state |= Message.BODY_READY return readlen + 2 available_data = len(data[0:message._chunk_size]) message._chunk_size -= available_data readlen += available_data message._body += data[0:available_data] if 0 == message._chunk_size: if not message._chunked: message.state |= Message.BODY_READY return readlen else: readlen += 2 return readlen def _parseRequest(self, message, data): match = self._req_exp.search(data) if match: message._method = Message.METHODS.index(match.group(1)) message._uri = match.group(2) message._http = match.group(3) return match def _parseResponse(self, message, data): match = self._state_exp.search(data) if match: message._http = match.group(1) message._code = int(match.group(2)) message._message = match.group(3) return match def _checkInvalid(self, message, data): end = 0 nl = data.find("\r\n") if -1 != nl: # We received an invalid message...ignore it and start again # TODO This should be logged. message.reset() end = nl + 2 return end # vim: set ft=python et ts=8 sw=4 sts=4: