#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright 2014 The Plaso Project Authors. # Please see the AUTHORS file for details on individual authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Parser for Windows IIS Log file. More documentation on fields can be found here: http://www.microsoft.com/technet/prodtechnol/WindowsServer2003/Library/ IIS/676400bc-8969-4aa7-851a-9319490a9bbb.mspx?mfr=true """ import logging import pyparsing from plaso.events import time_events from plaso.lib import eventdata from plaso.lib import timelib from plaso.parsers import manager from plaso.parsers import text_parser __author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)' class IISEventObject(time_events.TimestampEvent): """Convenience class to handle the IIS event object.""" DATA_TYPE = 'iis:log:line' def __init__(self, timestamp, structure): """Initializes the IIS event object. Args: timestamp: The timestamp time value, epoch. structure: The structure with any parsed log values to iterate over. """ super(IISEventObject, self).__init__( timestamp, eventdata.EventTimestamp.WRITTEN_TIME) for key, value in structure.iteritems(): if key in ('time', 'date'): continue if value == u'-': continue if type(value) is pyparsing.ParseResults: setattr(self, key, u''.join(value)) else: try: save_value = int(value, 10) except ValueError: save_value = value setattr(self, key, save_value) class WinIISParser(text_parser.PyparsingSingleLineTextParser): """Parses a Microsoft IIS log file.""" NAME = 'winiis' DESCRIPTION = u'Parser for Microsoft IIS log files.' # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status # sc-substatus sc-win32-status. # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus # sc-win32-status time-taken # Define common structures. BLANK = pyparsing.Literal(u'-') WORD = pyparsing.Word(pyparsing.alphanums + u'-') | BLANK INT = pyparsing.Word(pyparsing.nums, min=1) | BLANK IP = ( text_parser.PyparsingConstants.IPV4_ADDRESS | text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) PORT = pyparsing.Word(pyparsing.nums, min=1, max=6) | BLANK URI = pyparsing.Word(pyparsing.alphanums + u'/.?&+;_=()-:,%') | BLANK # Define how a log line should look like for version 6.0. LOG_LINE_6_0 = ( text_parser.PyparsingConstants.DATE.setResultsName('date') + text_parser.PyparsingConstants.TIME.setResultsName('time') + WORD.setResultsName('s_sitename') + IP.setResultsName('dest_ip') + WORD.setResultsName('http_method') + URI.setResultsName('cs_uri_stem') + URI.setResultsName('cs_uri_query') + PORT.setResultsName('dest_port') + WORD.setResultsName('cs_username') + IP.setResultsName('source_ip') + URI.setResultsName('user_agent') + INT.setResultsName('sc_status') + INT.setResultsName('sc_substatus') + INT.setResultsName('sc_win32_status')) _LOG_LINE_STRUCTURES = {} # Common fields. Set results name with underscores, not hyphens because regex # will not pick them up. _LOG_LINE_STRUCTURES['date'] = ( text_parser.PyparsingConstants.DATE.setResultsName('date')) _LOG_LINE_STRUCTURES['time'] = ( text_parser.PyparsingConstants.TIME.setResultsName('time')) _LOG_LINE_STRUCTURES['s-sitename'] = WORD.setResultsName('s_sitename') _LOG_LINE_STRUCTURES['s-ip'] = IP.setResultsName('dest_ip') _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method') _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName('requested_uri_stem') _LOG_LINE_STRUCTURES['cs-uri-query'] = URI.setResultsName('cs_uri_query') _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port') _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username') _LOG_LINE_STRUCTURES['c-ip'] = IP.setResultsName('source_ip') _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent') _LOG_LINE_STRUCTURES['sc-status'] = INT.setResultsName('http_status') _LOG_LINE_STRUCTURES['sc-substatus'] = INT.setResultsName('sc_substatus') _LOG_LINE_STRUCTURES['sc-win32-status'] = ( INT.setResultsName('sc_win32_status')) # Less common fields. _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName('s_computername') _LOG_LINE_STRUCTURES['sc-bytes'] = INT.setResultsName('sent_bytes') _LOG_LINE_STRUCTURES['cs-bytes'] = INT.setResultsName('received_bytes') _LOG_LINE_STRUCTURES['time-taken'] = INT.setResultsName('time_taken') _LOG_LINE_STRUCTURES['cs-version'] = WORD.setResultsName('protocol_version') _LOG_LINE_STRUCTURES['cs-host'] = WORD.setResultsName('cs_host') _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie') _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer') # Define the available log line structures. Default to the IIS v. 6.0 # common format. LINE_STRUCTURES = [ ('comment', text_parser.PyparsingConstants.COMMENT_LINE_HASH), ('logline', LOG_LINE_6_0)] # Define a signature value for the log file. SIGNATURE = '#Software: Microsoft Internet Information Services' def __init__(self): """Initializes a parser object.""" super(WinIISParser, self).__init__() self.version = None self.software = None def VerifyStructure(self, unused_parser_context, line): """Verify that this file is an IIS log file. Args: parser_context: A parser context object (instance of ParserContext). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ # TODO: Examine other versions of the file format and if this parser should # support them. For now just checking if it contains the IIS header. if self.SIGNATURE in line: return True return False def ParseRecord(self, unused_parser_context, key, structure): """Parse each record structure and return an event object if applicable. Args: parser_context: A parser context object (instance of ParserContext). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. Returns: An event object (instance of EventObject) or None. """ if key == 'comment': self._ParseCommentRecord(structure) elif key == 'logline': return self._ParseLogLine(structure) else: logging.warning( u'Unable to parse record, unknown structure: {0:s}'.format(key)) def _ParseCommentRecord(self, structure): """Parse a comment and store appropriate attributes.""" comment = structure[1] if comment.startswith(u'Version'): _, _, self.version = comment.partition(u':') elif comment.startswith(u'Software'): _, _, self.software = comment.partition(u':') elif comment.startswith(u'Date'): # TODO: fix this date is not used here. _, _, unused_date = comment.partition(u':') # Check if there's a Fields line. If not, LOG_LINE defaults to IIS 6.0 # common format. elif comment.startswith(u'Fields'): log_line = pyparsing.Empty() for member in comment[7:].split(): log_line += self._LOG_LINE_STRUCTURES.get(member, self.URI) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures[1] = ('logline', log_line) def _ParseLogLine(self, structure): """Parse a single log line and return an EventObject.""" date = structure.get('date', None) time = structure.get('time', None) if not (date and time): logging.warning(( u'Unable to extract timestamp from IIS log line with structure: ' u'{0:s}.').format(structure)) return year, month, day = date hour, minute, second = time timestamp = timelib.Timestamp.FromTimeParts( year, month, day, hour, minute, second) if not timestamp: return return IISEventObject(timestamp, structure) manager.ParsersManager.RegisterParser(WinIISParser)