# Written by Bram Cohen
# Modified by Cameron Dale
# see LICENSE.txt for license information
#
# $Id: HTTPHandler.py 376 2008-05-05 01:33:30Z camrdale-guest $

"""Handles incoming HTTP connections from other clients to this server.

@type logger: C{logging.Logger}
@var logger: the logger to send all log messages to for this module
@type weekdays: C{list} of C{string}
@var weekdays: the days of the week
@type months: C{list} of C{string}
@var months: the months of the year

"""

from cStringIO import StringIO
from sys import stdout, exc_info
import time
from clock import clock
from gzip import GzipFile
import signal, os, logging

logger = logging.getLogger('DebTorrent.HTTPHandler')

weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

DEBTORRENT_PROTOCOL = "0.2"

def isotime(secs = None):
    """Create an ISO formatted string of the time.
    
    @type secs: C{float}
    @param secs: number of seconds since the epoch 
        (optional, default is to use the current time)
    @rtype: C{string}
    @return: the ISO formatted string representation of the time
    
    """
    
    if secs == None:
        secs = time.time()
    return time.strftime('%Y-%m-%d %H:%M UTC', time.gmtime(secs))

class HTTPRequest:
    """A single request on an HTTP connection.
    
    Handles one of possibly many HTTP GET or HEAD requests from a client using
    HTTP/1.1.
    
    @type header: C{string}
    @ivar header: the first header line received from the request
    @type command: C{string}
    @ivar command: the requested command ('GET' or 'HEAD')
    @type path: C{string}
    @ivar path: the requested path to get
    @type encoding: C{string}
    @ivar encoding: the encoding to use when sending the response
    @type headers: C{dictionary}
    @ivar headers: the headers received with the request
    @type answer: (C{int}, C{string}, C{dictionary}, C{string} or C{file})
    @ivar answer: the HTTP status code, status message, headers, and package
        data, or None if the answer is not yet available
    
    """
    
    def __init__(self, header, command, path, encoding, headers):
        """Initialize the instance.
        
        @type header: C{string}
        @param header: the first header line received from the request
        @type command: C{string}
        @param command: the requested command ('GET' or 'HEAD')
        @type path: C{string}
        @param path: the requested path to get
        @type encoding: C{string}
        @param encoding: the encoding to use when sending the response
        @type headers: C{dictionary}
        @param headers: the headers received with the request
        
        """
        
        self.header = header
        self.command = command
        self.path = path
        self.encoding = encoding
        self.headers = headers
        self.answer = None
        
    def save_answer(self, r):
        """Save an answer, replacing the old one if it's better.
        
        @type r: (C{int}, C{string}, C{dictionary}, C{string} or C{file})
        @param r: the HTTP status code, status message, headers, and package data
        
        """
        
        # Queue the answer
        if self.answer:
            logger.error('An answer already exists for this request, keeping the better one')
            # Better means lower code, or newer response if codes are the same
            if r[0] <= self.answer[0]:
                self.answer = r
        else:
            self.answer = r
        
    def has_answer(self):
        """Determine whether an answer is available for the request.
        
        @rtype: C{boolean}
        @return: whether the answer is available yet
        
        """
        
        return not not self.answer

    def get_answer(self):
        """Get the saved answer.
        
        @rtype: (C{int}, C{string}, C{dictionary}, C{string})
        @return: the HTTP status code, status message, headers, and package
            data, or None if the answer is not yet available
        
        """
        
        return self.answer

class HTTPConnection:
    """A single connection from an HTTP client.
    
    Handles a single HTTP GET or HEAD request from a client using HTTP/1.0.
    
    @type handler: L{HTTPHandler}
    @ivar handler: the handler for all incoming HTTP connections
    @type connection: L{SocketHandler.SingleSocket}
    @ivar connection: the new connection that was created
    @type buf: C{string}
    @ivar buf: the buffered data received on the connection
    @type requests: C{list} of L{HTTPRequest}
    @ivar requests: the outstanding requests for paths
    @type protocol: C{string}
    @ivar protocol: the protocol used to make the request
    @type version: (C{int}, C{int})
    @ivar version: the protocol version of the request
    @type close_connection: C{boolean}
    @ivar close_connection: whether the connection will be closed after this
        request
    @type closed: C{boolean}
    @ivar closed: whether the connection has been closed
    @type done: C{boolean}
    @ivar done: whether the response has been sent
    @type donereading: C{boolean}
    @ivar donereading: whether the headers have been read
    @type next_func: C{method}
    @ivar next_func: the next method to call to process data on this connection
    @type header: C{string}
    @ivar header: the first header line received from the request
    @type command: C{string}
    @ivar command: the requested command ('GET' or 'HEAD')
    @type path: C{string}
    @ivar path: the requested path to get
    @type headers: C{dictionary}
    @ivar headers: the headers received with the request
    @type encoding: C{string}
    @ivar encoding: the encoding to use when sending the response
    
    """
    
    def __init__(self, handler, connection):
        """Initialize the instance.
        
        @type handler: L{HTTPHandler}
        @param handler: the handler for all incoming HTTP connections
        @type connection: L{SocketHandler.SingleSocket}
        @param connection: the new connection that was created
        
        """
        
        self.handler = handler
        self.connection = connection
        self.buf = ''
        self.requests = []
        self.protocol = ''
        self.version = None
        self.close_connection = True
        self.closed = False
        self.done = False
        self.donereading = False
        self.req_count = 0
        self.next_func = self.read_type

    def get_ip(self):
        """Get the IP address of the connection.
        
        @rtype: C{string}
        @return: the IP address
        
        """
        
        return self.connection.get_ip()

    def data_came_in(self, data):
        """Process data that came in on the connection.
        
        Processes the request lines one at a time by calling the L{next_func}
        method with the lines.
        
        @type data: C{string}
        @param data: the data that came in
        @rtype: C{boolean}
        @return: whether to keep the connection open
        
        """
        
        if self.donereading or self.next_func is None:
            return True
        self.buf += data
        while True:
            try:
                i = self.buf.index('\n')
            except ValueError:
                return True
            val = self.buf[:i]
            self.buf = self.buf[i+1:]
            self.next_func = self.next_func(val)
            if self.donereading:
                return True
            if self.next_func is None or self.closed:
                return False

    def read_type(self, data):
        """Process the first header line that came in.
        
        @type data: C{string}
        @param data: the line that came in
        @rtype: C{method}
        @return: the next method to call to process data on this connection
        
        """
        
        self.req_count += 1
        self.header = data.strip()
        words = data.split()
        if len(words) == 3:
            # Must be HTTP 1.0 or greater
            self.command, self.path, version = words

            try:
                # Extract the protocol from the request
                self.protocol, base_version_number = version.split('/', 1)
            except:
                logger.error("Bad request protocol (%r)", version)
                return None
            
            if self.handler.protocol >= "HTTP/1.1":
                try:
                    # Extract the version number from the request
                    self.protocol, base_version_number = version.split('/', 1)
                    version_number = base_version_number.split(".")
                    if len(version_number) != 2:
                        logger.error("Bad request version (%r)", version)
                        return None
                    self.version = int(version_number[0]), int(version_number[1])
                except (ValueError, IndexError):
                    logger.error("Bad request version (%r)", version)
                    return None
                
                # Use persistent connections for DEBTORRENT/HTTP1.1
                if (self.protocol == "DEBTORRENT" or 
                    (self.protocol == "HTTP" and self.version >= (1, 1))):
                    self.close_connection = False
                    
            elif self.protocol != "HTTP":
                logger.error("Unsupported protocol (%r)", version)
                return None
            else:
                self.version = (1, 0)
            
        elif len(words) == 2:
            # Old HTTP 0.9 connections don't include the version and only support GET
            self.command, self.path = words
            self.protocol = 'HTTP'
            self.version = (0, 9)
            if self.command != 'GET':
                logger.warning('connection closed, improper command: '+self.command)
                return None
        else:
            logger.warning('connection closed, corrupt header line: '+data)
            return None
        
        if self.command not in ('HEAD', 'GET'):
            logger.warning('connection closed, improper command: '+self.command)
            return None
        
        logger.info(str(self.req_count)+': '+self.protocol+' '+self.header)
        self.headers = {}
        return self.read_header

    def read_header(self, data):
        """Process the next header line that came in.
        
        @type data: C{string}
        @param data: the line that came in
        @rtype: C{method}
        @return: the next method to call to process data on this connection
        
        """
        
        data = data.strip()
        
        # A blank line indicates the headers are done
        if data == '':
            # Get the encoding to use for the answer
            if self.headers.get('accept-encoding','').find('gzip') > -1:
                self.encoding = 'gzip'
            else:
                self.encoding = 'identity'
                
            # Check for persistent connection headers
            conntype = self.headers.get('Connection', "").lower()
            if conntype == 'close':
                self.close_connection = True
            elif conntype == 'keep-alive' and self.handler.protocol >= "HTTP/1.1":
                self.close_connection = False

            # If this is not the last request
            newrequest = None
            if not self.close_connection or self.requests:
                newrequest = HTTPRequest(self.header, self.command, self.path,
                                         self.encoding, self.headers)
                self.requests.append(newrequest)

            # Call the function to process the request
            r = self.handler.getfunc(self, self.path, self.headers, newrequest)

            # Send the answer if available
            if r is not None:
                if newrequest:
                    # Multiple requests, so queue it for possible sending
                    self.answer(r, newrequest)
                else:
                    # It's the only request, so just send it
                    self.send_answer(r, self.header, self.command, self.path,
                                     self.encoding, self.headers)
                
            # Request complete, close or wait for more
            if self.close_connection:
                self.donereading = True
                return None
            else:
                self.close_connection = True
                return self.read_type
            
        # Process the header line
        try:
            i = data.index(':')
        except ValueError:
            logger.warning('connection closed, corrupt header: '+data)
            return None
        self.headers[data[:i].strip().lower()] = data[i+1:].strip()
        logger.debug(data[:i].strip() + ": " + data[i+1:].strip())
        return self.read_header

    def answer(self, r, httpreq):
        """Add a response to the queued responses and check if any are ready to send.
        
        @type r: (C{int}, C{string}, C{dictionary}, C{string} or C{file})
        @param r: the HTTP status code, status message, headers, and package data
        @type httpreq: L{HTTPRequest}
        @param httpreq: the request the answer is for
        
        """
        
        if self.closed:
            logger.warning('connection closed before anwswer, dropping data')
            return
        
        if not self.requests:
            if httpreq is None:
                # There's only one request allowed, so send the answer
                if (r[0] != 102 and r[0] != 103) or (
                            self.protocol == "DEBTORRENT" and 
                            self.version >= (0, 2)):
                    self.send_answer(r, self.header, self.command, self.path,
                                     self.encoding, self.headers)
            else:
                logger.error('got answer for unknown request')
            return

        if httpreq:
            if httpreq not in self.requests:
                logger.error('Got an answer for an unknown request')
            else:
                if self.protocol == "DEBTORRENT":
                    # DEBTORRENT requests get sent immediately
                    if r[0] != 102:
                        self.requests.remove(httpreq)
                    if r[0] != 102 or (self.version >= (0, 2)):
                        self.send_answer(r, httpreq.header, httpreq.command,
                                         httpreq.path, httpreq.encoding,
                                         httpreq.headers)
                else:
                    if r[0] != 102:
                        httpreq.save_answer(r)
        elif r[0] == 103 and self.protocol == "DEBTORRENT" and self.version >= (0, 2):
            self.send_answer(r, 'GET None DEBTORRENT/0.2', 'GET', 'None', 'identity', {})
            

        # Answer all possible requests
        while self.requests and self.requests[0].has_answer():
            httpreq = self.requests.pop(0)
            r = httpreq.get_answer()
            self.send_answer(r, httpreq.header, httpreq.command, httpreq.path,
                             httpreq.encoding, httpreq.headers)

    def send_answer(self, (responsecode, responsestring, headers, data),
                    header, command, path, encoding, req_headers):
        """Send out the complete request.
        
        @type responsecode: C{int}
        @param responsecode: the response code to send
        @type responsestring: C{string}
        @param responsestring: the response string to send
        @type headers: C{dictionary}
        @param headers: the headers to send with the response
        @type data: C{string} or C{file}
        @param data: the data to send with the response
        @type header: C{string}
        @param header: the first header line received from the request
        @type command: C{string}
        @param command: the requested command ('GET' or 'HEAD')
        @type path: C{string}
        @param path: the requested path to get
        @type encoding: C{string}
        @param encoding: the encoding to use when sending the response
        @type req_headers: C{dictionary}
        @param req_headers: the headers received with the request
        
        """

        # Encode the response data
        if encoding == 'gzip' and isinstance(data, str):
            compressed = StringIO()
            gz = GzipFile(fileobj = compressed, mode = 'wb', compresslevel = 9)
            gz.write(data)
            gz.close()
            cdata = compressed.getvalue()
            if len(cdata) >= len(data):
                encoding = 'identity'
            else:
                logger.debug('Compressed: '+str(len(cdata))+'  Uncompressed: '+str(len(data)))
                data = cdata
                headers['Content-Encoding'] = 'gzip'

        # Determine the length of the data to be written
        if isinstance(data, str):
            size = len(data)
        else:
            size = os.fstat(data.fileno())[6] - data.tell()

        # Check for a local header and other qualifications
        if (req_headers.get('if-local-nocache', '').lower() == 'true' and
            self.protocol == "DEBTORRENT" and type(data) == file and
            responsecode == 200 and self.connection.get_ip().startswith('127.')):
            # Send the No Content response with the file name as header
            responsecode = 204
            responsestring = "No Content"
            headers['NoCache-File'] = data.name
            data.close()
            data = None
        
        # i'm abusing the identd field here, but this should be ok
        if encoding == 'identity':
            ident = '-'
        else:
            ident = self.encoding
            
        if responsecode >= 200:
            self.handler.write_log( self.connection.get_ip(), ident, '-',
                                    header, responsecode, size,
                                    req_headers.get('referer', '-'),
                                    req_headers.get('user-agent', '-') )
    
            logger.info('sending response: '+self.protocol+' '+str(responsecode)+' '+responsestring+
                        ' ('+str(size)+' bytes)' + repr(headers))
        
        r = StringIO()
        
        # Write the header line
        if self.protocol == "HTTP":
            r.write(self.handler.protocol + ' ' + str(responsecode) + ' ' + 
                    responsestring + '\r\n')
        elif self.protocol == "DEBTORRENT":
            r.write('DEBTORRENT/'+DEBTORRENT_PROTOCOL+' '+path+' '+
                    str(responsecode)+' '+responsestring+'\r\n')
            
        # Write the individual headers
        if self.version >= (1, 0) or self.protocol != 'HTTP':
            headers['Content-Length'] = size
            for key, value in headers.items():
                r.write(key + ': ' + str(value) + '\r\n')
            r.write('\r\n')
            
        self.connection.write(r.getvalue())
    
        # Don't write the body if only the headers are requested
        if command != 'HEAD' and data:
            self.connection.write(data)
            
    def close(self):
        """Close the connection and drop all pending requests/answers."""
        logger.debug('HTTP connection closed')
        self.closed = True
        del self.connection
        self.next_func = None
        for httpreq in self.requests:
            if httpreq.has_answer():
                logger.debug('Connection lost before answer could be sent: '+httpreq.path)
        del self.requests[:]
        
        
class HTTPHandler:
    """The handler for all new and existing HTTP connections.
    
    Supports HTTP/1.1 persistent connections with pipelining if the protocol
    is set to 'HTTP/1.1'.
    
    @type connections: C{dictionary}
    @ivar connections: all the existing connections, keys are the connection 
        objects, values are the L{HTTPConnection} objects
    @type getfunc: C{method}
    @ivar getfunc: the method to call with the processed GET requests
    @type minflush: C{float}
    @ivar minflush: the minimum amount of time between flushing the log
    @type lastflush: C{float}
    @ivar lastflush: the time of the last log flush
    @type logfile: C{string}
    @ivar logfile: the file name to write the logs to
    @type log: C{file}
    @ivar log: the file to write the logs to
    @type protocol: C{string}
    @ivar protocol: the HTTP protocol version to use
    
    """
    
    def __init__(self, getfunc, minflush, logfile = None, hupmonitor = None,
                 protocol = 'HTTP/1.0'):
        """Initialize the instance.
        
        @type getfunc: C{method}
        @param getfunc: the method to call with the processed GET requests
        @type minflush: C{float}
        @param minflush: the minimum amount of time between flushing the log
        @type logfile: C{string}
        @param logfile: the file to write the logs to
            (optional, defaults to standard output)
        @type hupmonitor: C{boolean}
        @param hupmonitor: whether to reopen the log file on a HUP signal
            (optional, default is False)
        @type protocol: C{string}
        @param protocol: the HTTP protocol version to use
            (optional, defaults to HTTP/1.0)
        
        """
        
        self.connections = {}
        self.getfunc = getfunc
        self.minflush = minflush
        self.lastflush = clock()
        self.logfile = None
        self.log = None
        self.protocol = protocol
        if (logfile) and (logfile != '-'):
            try:
                self.logfile = logfile
                self.log = open(self.logfile,'a')
                print >> self.log, "# Log Started: ", isotime()
            except:
                logger.exception('could not open log file')
            else:
                if hupmonitor:
                    def huphandler(signum, frame, self = self):
                        try:
                            self.log.close()
                            self.log = open(self.logfile,'a')
                            print >> self.log, "# Log reopened: ", isotime()
                        except:
                            logger.exception('could not reopen log file')
                     
                    signal.signal(signal.SIGHUP, huphandler)            

    def external_connection_made(self, connection):
        """Create a new HTTPConnection object.
        
        @type connection: L{SocketHandler.SingleSocket}
        @param connection: the connection
        
        """
        
        logger.debug('new external connection')
        self.connections[connection] = HTTPConnection(self, connection)

    def connection_flushed(self, connection):
        """Flush a connection.
        
        @type connection: L{SocketHandler.SingleSocket}
        @param connection: the connection
        
        """
        
        logger.debug('connection flushed')
        if self.connections[connection].done:
            logger.debug('connection shutdown')
            connection.shutdown(1)

    def connection_lost(self, connection):
        """Remove the lost connection from the existing ones.
        
        @type connection: L{SocketHandler.SingleSocket}
        @param connection: the connection
        
        """
        
        logger.debug('connection lost')
        ec = self.connections[connection]
        ec.close()
        del self.connections[connection]

    def data_came_in(self, connection, data):
        """Process incoming data on an existing connection.
        
        @type connection: L{SocketHandler.SingleSocket}
        @param connection: the connection
        @type data: C{string}
        @param data: the data that came in
        
        """
        
        c = self.connections[connection]
        if not c.data_came_in(data) and not c.closed:
            logger.debug('closing connection')
            c.connection.shutdown(1)

    def write_log(self, ip, ident, username, header,
            responsecode, length, referrer, useragent):
        """Print a log message.
        
        @type ip: C{string}
        @param ip: the IP address of the connection
        @type ident: C{string}
        @param ident: the type of connection
        @type username: C{string}
        @param username: the username from the client
        @type header: C{string}
        @param header: the first header line of the request
        @type responsecode: C{int}
        @param responsecode: the HTTP status code of the response
        @type length: C{int}
        @param length: the number of bytes in the data returned
        @type referrer: C{string}
        @param referrer: the referer for the incoming request
        @type useragent: C{string}
        @param useragent: the client's useragent used to make the request
        
        """
        
        year, month, day, hour, minute, second, a, b, c = time.localtime(time.time())
        print >> self.log, '%s %s %s [%02d/%3s/%04d:%02d:%02d:%02d] "%s" %i %i "%s" "%s"' % (
            ip, ident, username, day, months[month], year, hour,
            minute, second, header, responsecode, length, referrer, useragent)
        t = clock()
        if t - self.lastflush > self.minflush:
            self.lastflush = t
            if self.log:
                self.log.flush()
            else:
                stdout.flush()
