#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright 2013 The Plaso Project Authors. # Please see the AUTHORS file for details on individual authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Parser for Microsoft Internet Explorer (MSIE) Cache Files (CF).""" import logging import pymsiecf from plaso.events import time_events from plaso.lib import errors from plaso.lib import eventdata from plaso.lib import timelib from plaso.parsers import interface from plaso.parsers import manager if pymsiecf.get_version() < '20130317': raise ImportWarning(u'MsiecfParser requires at least pymsiecf 20130317.') class MsiecfUrlEvent(time_events.TimestampEvent): """Convenience class for an MSIECF URL event.""" DATA_TYPE = 'msiecf:url' def __init__( self, timestamp, timestamp_description, msiecf_item, recovered=False): """Initializes the event. Args: timestamp: The timestamp value. timestamp_desc: The usage string describing the timestamp. msiecf_item: The MSIECF item (pymsiecf.url). recovered: Boolean value to indicate the item was recovered, False by default. """ super(MsiecfUrlEvent, self).__init__(timestamp, timestamp_description) self.recovered = recovered self.offset = msiecf_item.offset self.url = msiecf_item.location self.number_of_hits = msiecf_item.number_of_hits self.cache_directory_index = msiecf_item.cache_directory_index self.filename = msiecf_item.filename self.cached_file_size = msiecf_item.cached_file_size if msiecf_item.type and msiecf_item.data: if msiecf_item.type == u'cache': if msiecf_item.data[:4] == 'HTTP': self.http_headers = msiecf_item.data[:-1] # TODO: parse data of other URL item type like history which requires # OLE VT parsing. class MsiecfParser(interface.BaseParser): """Parses MSIE Cache Files (MSIECF).""" NAME = 'msiecf' DESCRIPTION = u'Parser for MSIE Cache Files (MSIECF) also known as index.dat.' def _ParseUrl( self, parser_context, msiecf_item, file_entry=None, parser_chain=None, recovered=False): """Extract data from a MSIE Cache Files (MSIECF) URL item. Every item is stored as an event object, one for each timestamp. Args: parser_context: A parser context object (instance of ParserContext). msiecf_item: An item (pymsiecf.url). file_entry: optional file entry object (instance of dfvfs.FileEntry). The default is None. parser_chain: Optional string containing the parsing chain up to this point. The default is None. recovered: Boolean value to indicate the item was recovered, False by default. """ # The secondary timestamp can be stored in either UTC or local time # this is dependent on what the index.dat file is used for. # Either the file path of the location string can be used to distinguish # between the different type of files. primary_timestamp = timelib.Timestamp.FromFiletime( msiecf_item.get_primary_time_as_integer()) primary_timestamp_desc = 'Primary Time' # Need to convert the FILETIME to the internal timestamp here to # do the from localtime conversion. secondary_timestamp = timelib.Timestamp.FromFiletime( msiecf_item.get_secondary_time_as_integer()) secondary_timestamp_desc = 'Secondary Time' if msiecf_item.type: if msiecf_item.type == u'cache': primary_timestamp_desc = eventdata.EventTimestamp.ACCESS_TIME secondary_timestamp_desc = eventdata.EventTimestamp.MODIFICATION_TIME elif msiecf_item.type == u'cookie': primary_timestamp_desc = eventdata.EventTimestamp.ACCESS_TIME secondary_timestamp_desc = eventdata.EventTimestamp.MODIFICATION_TIME elif msiecf_item.type == u'history': primary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME elif msiecf_item.type == u'history-daily': primary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME # The secondary_timestamp is in localtime normalize it to be in UTC. secondary_timestamp = timelib.Timestamp.LocaltimeToUTC( secondary_timestamp, parser_context.timezone) elif msiecf_item.type == u'history-weekly': primary_timestamp_desc = eventdata.EventTimestamp.CREATION_TIME secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME # The secondary_timestamp is in localtime normalize it to be in UTC. secondary_timestamp = timelib.Timestamp.LocaltimeToUTC( secondary_timestamp, parser_context.timezone) event_object = MsiecfUrlEvent( primary_timestamp, primary_timestamp_desc, msiecf_item, recovered) parser_context.ProduceEvent( event_object, parser_chain=parser_chain, file_entry=file_entry) if secondary_timestamp > 0: event_object = MsiecfUrlEvent( secondary_timestamp, secondary_timestamp_desc, msiecf_item, recovered) parser_context.ProduceEvent( event_object, parser_chain=parser_chain, file_entry=file_entry) expiration_timestamp = msiecf_item.get_expiration_time_as_integer() if expiration_timestamp > 0: # The expiration time in MSIECF version 4.7 is stored as a FILETIME value # in version 5.2 it is stored as a FAT date time value. # Since the as_integer function returns the raw integer value we need to # apply the right conversion here. if self.version == u'4.7': event_object = MsiecfUrlEvent( timelib.Timestamp.FromFiletime(expiration_timestamp), eventdata.EventTimestamp.EXPIRATION_TIME, msiecf_item, recovered) else: event_object = MsiecfUrlEvent( timelib.Timestamp.FromFatDateTime(expiration_timestamp), eventdata.EventTimestamp.EXPIRATION_TIME, msiecf_item, recovered) parser_context.ProduceEvent( event_object, parser_chain=parser_chain, file_entry=file_entry) last_checked_timestamp = msiecf_item.get_last_checked_time_as_integer() if last_checked_timestamp > 0: event_object = MsiecfUrlEvent( timelib.Timestamp.FromFatDateTime(last_checked_timestamp), eventdata.EventTimestamp.LAST_CHECKED_TIME, msiecf_item, recovered) parser_context.ProduceEvent( event_object, parser_chain=parser_chain, file_entry=file_entry) def Parse(self, parser_context, file_entry, parser_chain=None): """Extract data from a MSIE Cache File (MSIECF). Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). parser_chain: Optional string containing the parsing chain up to this point. The default is None. """ file_object = file_entry.GetFileObject() msiecf_file = pymsiecf.file() msiecf_file.set_ascii_codepage(parser_context.codepage) try: msiecf_file.open_file_object(file_object) self.version = msiecf_file.format_version except IOError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_entry.name, exception)) # Add ourselves to the parser chain, which will be used in all subsequent # event creation in this parser. parser_chain = self._BuildParserChain(parser_chain) for item_index in range(0, msiecf_file.number_of_items): try: msiecf_item = msiecf_file.get_item(item_index) if isinstance(msiecf_item, pymsiecf.url): self._ParseUrl( parser_context, msiecf_item, file_entry=file_entry, parser_chain=parser_chain) # TODO: implement support for pymsiecf.leak, pymsiecf.redirected, # pymsiecf.item. except IOError as exception: logging.warning( u'[{0:s}] unable to parse item: {1:d} in file: {2:s}: {3:s}'.format( self.NAME, item_index, file_entry.name, exception)) for item_index in range(0, msiecf_file.number_of_recovered_items): try: msiecf_item = msiecf_file.get_recovered_item(item_index) if isinstance(msiecf_item, pymsiecf.url): self._ParseUrl( parser_context, msiecf_item, file_entry=file_entry, parser_chain=parser_chain, recovered=True) # TODO: implement support for pymsiecf.leak, pymsiecf.redirected, # pymsiecf.item. except IOError as exception: logging.info(( u'[{0:s}] unable to parse recovered item: {1:d} in file: {2:s}: ' u'{3:s}').format( self.NAME, item_index, file_entry.name, exception)) file_object.close() manager.ParsersManager.RegisterParser(MsiecfParser)