You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
5.4 KiB
169 lines
5.4 KiB
"""
|
|
@author Georg Hopp
|
|
|
|
"""
|
|
|
|
import re
|
|
from Message import Message
|
|
|
|
class Parser(object):
|
|
def __init__(self):
|
|
self._header_exp = re.compile(r"([^:]+):(.+)\r\n")
|
|
self._chunk_exp = re.compile(r"([\da-f]+).*\r\n")
|
|
self._req_exp = re.compile(
|
|
r".*(%s) +([^ ]+) +(HTTP/\d\.\d)\r\n"%'|'.join(Message.METHODS))
|
|
self._state_exp = re.compile(r".*(HTTP/\d\.\d) *(\d{3}) *(.*)\r\n")
|
|
|
|
def parse(self, message, data):
|
|
"""
|
|
Parse data into this message.
|
|
|
|
Returns 0 when the Message is already complete or the amount of the
|
|
successfully parsed data.
|
|
|
|
@message: An HttpMessage instance where the data is parsed into.
|
|
@data: The data to be parsed.
|
|
"""
|
|
end = 0
|
|
|
|
if 0 == message.state:
|
|
if message.isRequest() or message.isResponse():
|
|
message.reset()
|
|
end += self.parseStartLine(message, data)
|
|
|
|
if message.startlineReady() and not message.headersReady():
|
|
end += self.parseHeaders(message, data[end:])
|
|
|
|
if message.headersReady() and not message.bodyReady():
|
|
end += self.parseBody(message, data[end:])
|
|
|
|
return end
|
|
|
|
def parseStartLine(self, message, data):
|
|
"""
|
|
Parse data into the HTTP message startline, either a Request- or a
|
|
Statusline. This will set the message start_line if the given data
|
|
matches the start_exp expression. In that case it will also set
|
|
the start_ready flag.
|
|
|
|
Returns the position of the data that is not parsed.
|
|
|
|
@message: An HttpMessage instance where the data is parsed into.
|
|
@data: The data to be parsed.
|
|
"""
|
|
end = 0
|
|
|
|
match = self._parseRequest(message, data)
|
|
if match: end = match.end()
|
|
|
|
match = self._parseResponse(message, data)
|
|
if match: end = match.end()
|
|
|
|
if 0 != end:
|
|
message.state |= Message.START_READY
|
|
else:
|
|
end = self._checkInvalid(message, data[end:])
|
|
|
|
return end
|
|
|
|
def parseHeaders(self, message, data):
|
|
"""
|
|
Parse data into the headers of a message.
|
|
|
|
Returns the position of the data that is not parsed.
|
|
|
|
@message: An HttpMessage instance where the data is parsed into.
|
|
@data: The data to be parsed.
|
|
"""
|
|
end = 0
|
|
|
|
match = self._header_exp.match(data[end:])
|
|
while match and "\r\n" != data[end:end+2]:
|
|
message.setHeader(match.group(1).strip(), match.group(2).strip())
|
|
end += match.end()
|
|
match = self._header_exp.match(data[end:])
|
|
|
|
if "\r\n" == data[end:end+2]:
|
|
# a single \r\n at the beginning indicates end of headers.
|
|
if message.headerKeyExists('Content-Length'):
|
|
message._chunk_size = int(message.getHeader('Content-Length'))
|
|
elif message.headerKeyExists('Transfer-Encoding') and \
|
|
'chunked' in message.getHeader('Transfer-Encoding'):
|
|
message._chunked = True
|
|
else:
|
|
message.state |= Message.BODY_READY
|
|
|
|
message.state |= Message.HEADERS_READY
|
|
end += 2
|
|
else:
|
|
end += self._checkInvalid(message, data[end:])
|
|
|
|
return end
|
|
|
|
def parseBody(self, message, data):
|
|
"""
|
|
Parse data into the body of a message. This is also capable of
|
|
handling chunked bodies as defined for HTTP/1.1.
|
|
|
|
Returns the position of the data that is not parsed.
|
|
|
|
@message: An HttpMessage instance where the data is parsed into.
|
|
@data: The data to be parsed.
|
|
"""
|
|
readlen = 0
|
|
|
|
if message._chunked and 0 == message._chunk_size:
|
|
match = self._chunk_exp.match(data)
|
|
|
|
if match is None:
|
|
return 0
|
|
|
|
message._chunk_size = int(match.group(1), 16)
|
|
readlen += match.end()
|
|
data = data[match.end():]
|
|
|
|
if 0 == self._chunk_size:
|
|
message.state |= Message.BODY_READY
|
|
return readlen + 2
|
|
|
|
available_data = len(data[0:message._chunk_size])
|
|
message._chunk_size -= available_data
|
|
readlen += available_data
|
|
message._body += data[0:available_data]
|
|
|
|
if 0 == message._chunk_size:
|
|
if not message._chunked:
|
|
message.state |= Message.BODY_READY
|
|
return readlen
|
|
else:
|
|
readlen += 2
|
|
|
|
return readlen
|
|
|
|
def _parseRequest(self, message, data):
|
|
match = self._req_exp.search(data)
|
|
if match:
|
|
message._method = Message.METHODS.index(match.group(1))
|
|
message._uri = match.group(2)
|
|
message._http = match.group(3)
|
|
return match
|
|
|
|
def _parseResponse(self, message, data):
|
|
match = self._state_exp.search(data)
|
|
if match:
|
|
message._http = match.group(1)
|
|
message._code = int(match.group(2))
|
|
message._message = match.group(3)
|
|
return match
|
|
|
|
def _checkInvalid(self, message, data):
|
|
end = 0
|
|
nl = data.find("\r\n")
|
|
if -1 != nl:
|
|
# We received an invalid message...ignore it and start again
|
|
# TODO This should be logged.
|
|
message.reset()
|
|
end = nl + 2
|
|
return end
|
|
|
|
# vim: set ft=python et ts=8 sw=4 sts=4:
|