#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright 2014 The Plaso Project Authors. # Please see the AUTHORS file for details on individual authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Parser for Google Chrome and Chromium Cache files.""" import logging import os import construct from dfvfs.resolver import resolver as path_spec_resolver from dfvfs.path import factory as path_spec_factory from plaso.events import time_events from plaso.lib import errors from plaso.lib import eventdata from plaso.parsers import interface from plaso.parsers import manager class CacheAddress(object): """Class that contains a cache address.""" FILE_TYPE_SEPARATE = 0 FILE_TYPE_BLOCK_RANKINGS = 1 FILE_TYPE_BLOCK_256 = 2 FILE_TYPE_BLOCK_1024 = 3 FILE_TYPE_BLOCK_4096 = 4 _BLOCK_DATA_FILE_TYPES = [ FILE_TYPE_BLOCK_RANKINGS, FILE_TYPE_BLOCK_256, FILE_TYPE_BLOCK_1024, FILE_TYPE_BLOCK_4096] _FILE_TYPE_BLOCK_SIZES = [0, 36, 256, 1024, 4096] def __init__(self, cache_address): """Initializes the cache address object. Args: cache_address: the cache address value. """ super(CacheAddress, self).__init__() self.block_number = None self.block_offset = None self.block_size = None self.filename = None self.value = cache_address if cache_address & 0x80000000: self.is_initialized = u'True' else: self.is_initialized = u'False' self.file_type = (cache_address & 0x70000000) >> 28 if not cache_address == 0x00000000: if self.file_type == self.FILE_TYPE_SEPARATE: file_selector = cache_address & 0x0fffffff self.filename = u'f_{0:06x}'.format(file_selector) elif self.file_type in self._BLOCK_DATA_FILE_TYPES: file_selector = (cache_address & 0x00ff0000) >> 16 self.filename = u'data_{0:d}'.format(file_selector) file_block_size = self._FILE_TYPE_BLOCK_SIZES[self.file_type] self.block_number = cache_address & 0x0000ffff self.block_size = (cache_address & 0x03000000) >> 24 self.block_size *= file_block_size self.block_offset = 8192 + (self.block_number * file_block_size) class CacheEntry(object): """Class that contains a cache entry.""" def __init__(self): """Initializes the cache entry object.""" super(CacheEntry, self).__init__() self.creation_time = None self.hash = None self.key = None self.next = None self.rankings_node = None class IndexFile(object): """Class that contains an index file.""" SIGNATURE = 0xc103cac3 _FILE_HEADER = construct.Struct( 'chrome_cache_index_file_header', construct.ULInt32('signature'), construct.ULInt16('minor_version'), construct.ULInt16('major_version'), construct.ULInt32('number_of_entries'), construct.ULInt32('stored_data_size'), construct.ULInt32('last_created_file_number'), construct.ULInt32('unknown1'), construct.ULInt32('unknown2'), construct.ULInt32('table_size'), construct.ULInt32('unknown3'), construct.ULInt32('unknown4'), construct.ULInt64('creation_time'), construct.Padding(208)) def __init__(self): """Initializes the index file object.""" super(IndexFile, self).__init__() self._file_object = None self.creation_time = None self.version = None self.index_table = [] def _ReadFileHeader(self): """Reads the file header. Raises: IOError: if the file header cannot be read. """ self._file_object.seek(0, os.SEEK_SET) try: file_header = self._FILE_HEADER.parse_stream(self._file_object) except construct.FieldError as exception: raise IOError(u'Unable to parse file header with error: {0:s}'.format( exception)) signature = file_header.get('signature') if signature != self.SIGNATURE: raise IOError(u'Unsupported index file signature') self.version = u'{0:d}.{1:d}'.format( file_header.get('major_version'), file_header.get('minor_version')) if self.version not in [u'2.0', u'2.1']: raise IOError(u'Unsupported index file version: {0:s}'.format( self.version)) self.creation_time = file_header.get('creation_time') def _ReadIndexTable(self): """Reads the index table.""" cache_address_data = self._file_object.read(4) while len(cache_address_data) == 4: value = construct.ULInt32('cache_address').parse(cache_address_data) if value: cache_address = CacheAddress(value) self.index_table.append(cache_address) cache_address_data = self._file_object.read(4) def Close(self): """Closes the index file.""" if self._file_object: self._file_object.close() self._file_object = None def Open(self, file_object): """Opens the index file. Args: file_object: the file object. """ self._file_object = file_object self._ReadFileHeader() # Skip over the LRU data, which is 112 bytes in size. self._file_object.seek(112, os.SEEK_CUR) self._ReadIndexTable() class DataBlockFile(object): """Class that contains a data block file.""" SIGNATURE = 0xc104cac3 _FILE_HEADER = construct.Struct( 'chrome_cache_data_file_header', construct.ULInt32('signature'), construct.ULInt16('minor_version'), construct.ULInt16('major_version'), construct.ULInt16('file_number'), construct.ULInt16('next_file_number'), construct.ULInt32('block_size'), construct.ULInt32('number_of_entries'), construct.ULInt32('maximum_number_of_entries'), construct.Array(4, construct.ULInt32('emtpy')), construct.Array(4, construct.ULInt32('hints')), construct.ULInt32('updating'), construct.Array(5, construct.ULInt32('user'))) _CACHE_ENTRY = construct.Struct( 'chrome_cache_entry', construct.ULInt32('hash'), construct.ULInt32('next_address'), construct.ULInt32('rankings_node_address'), construct.ULInt32('reuse_count'), construct.ULInt32('refetch_count'), construct.ULInt32('state'), construct.ULInt64('creation_time'), construct.ULInt32('key_size'), construct.ULInt32('long_key_address'), construct.Array(4, construct.ULInt32('data_stream_sizes')), construct.Array(4, construct.ULInt32('data_stream_addresses')), construct.ULInt32('flags'), construct.Padding(16), construct.ULInt32('self_hash'), construct.Array(160, construct.UBInt8('key'))) def __init__(self): """Initializes the data block file object.""" super(DataBlockFile, self).__init__() self._file_object = None self.creation_time = None self.block_size = None self.number_of_entries = None self.version = None def _ReadFileHeader(self): """Reads the file header. Raises: IOError: if the file header cannot be read. """ self._file_object.seek(0, os.SEEK_SET) try: file_header = self._FILE_HEADER.parse_stream(self._file_object) except construct.FieldError as exception: raise IOError(u'Unable to parse file header with error: {0:s}'.format( exception)) signature = file_header.get('signature') if signature != self.SIGNATURE: raise IOError(u'Unsupported data block file signature') self.version = u'{0:d}.{1:d}'.format( file_header.get('major_version'), file_header.get('minor_version')) if self.version not in [u'2.0', u'2.1']: raise IOError(u'Unsupported data block file version: {0:s}'.format( self.version)) self.block_size = file_header.get('block_size') self.number_of_entries = file_header.get('number_of_entries') def ReadCacheEntry(self, block_offset): """Reads a cache entry.""" self._file_object.seek(block_offset, os.SEEK_SET) try: cache_entry_struct = self._CACHE_ENTRY.parse_stream(self._file_object) except construct.FieldError as exception: raise IOError(u'Unable to parse cache entry with error: {0:s}'.format( exception)) cache_entry = CacheEntry() cache_entry.hash = cache_entry_struct.get('hash') cache_entry.next = CacheAddress(cache_entry_struct.get('next_address')) cache_entry.rankings_node = CacheAddress(cache_entry_struct.get( 'rankings_node_address')) cache_entry.creation_time = cache_entry_struct.get('creation_time') byte_array = cache_entry_struct.get('key') string = u''.join(map(chr, byte_array)) cache_entry.key, _, _ = string.partition(u'\x00') return cache_entry def Close(self): """Closes the data block file.""" if self._file_object: self._file_object.close() self._file_object = None def Open(self, file_object): """Opens the data block file. Args: file_object: the file object. """ self._file_object = file_object self._ReadFileHeader() class ChromeCacheEntryEvent(time_events.WebKitTimeEvent): """Class that contains a Chrome Cache event.""" DATA_TYPE = 'chrome:cache:entry' def __init__(self, cache_entry): """Initializes the event object. Args: cache_entry: the cache entry (instance of CacheEntry). """ super(ChromeCacheEntryEvent, self).__init__( cache_entry.creation_time, eventdata.EventTimestamp.CREATION_TIME) self.original_url = cache_entry.key class ChromeCacheParser(interface.BaseParser): """Parses Chrome Cache files.""" NAME = 'chrome_cache' DESCRIPTION = u'Parser for Chrome Cache files.' def Parse(self, parser_context, file_entry, parser_chain=None): """Extract event objects from Chrome Cache files. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). parser_chain: Optional string containing the parsing chain up to this point. The default is None. """ file_object = file_entry.GetFileObject() index_file = IndexFile() try: index_file.Open(file_object) except IOError as exception: file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unable to parse index file {1:s} with error: {2:s}'.format( self.NAME, file_entry.name, exception)) # Build a lookup table for the data block files. file_system = file_entry.GetFileSystem() path_segments = file_system.SplitPath(file_entry.path_spec.location) # Add ourselves to the parser chain, which will be used in all subsequent # event creation in this parser. parser_chain = self._BuildParserChain(parser_chain) data_block_files = {} for cache_address in index_file.index_table: if cache_address.filename not in data_block_files: # Remove the previous filename from the path segments list and # add one of the data block file. path_segments.pop() path_segments.append(cache_address.filename) # We need to pass only used arguments to the path specification # factory otherwise it will raise. kwargs = {} if file_entry.path_spec.parent: kwargs['parent'] = file_entry.path_spec.parent kwargs['location'] = file_system.JoinPath(path_segments) data_block_file_path_spec = path_spec_factory.Factory.NewPathSpec( file_entry.path_spec.TYPE_INDICATOR, **kwargs) try: data_block_file_entry = path_spec_resolver.Resolver.OpenFileEntry( data_block_file_path_spec) except RuntimeError as exception: logging.error(( u'[{0:s}] Unable to open data block file: {1:s} while parsing ' u'{2:s} with error: {3:s}').format( parser_chain, kwargs['location'], file_entry.path_spec.comparable, exception)) data_block_file_entry = None if not data_block_file_entry: logging.error(u'Missing data block file: {0:s}'.format( cache_address.filename)) data_block_file = None else: data_block_file_object = data_block_file_entry.GetFileObject() data_block_file = DataBlockFile() try: data_block_file.Open(data_block_file_object) except IOError as exception: logging.error(( u'Unable to open data block file: {0:s} with error: ' u'{1:s}').format(cache_address.filename, exception)) data_block_file = None data_block_files[cache_address.filename] = data_block_file # Parse the cache entries in the data block files. for cache_address in index_file.index_table: cache_address_chain_length = 0 while cache_address.value != 0x00000000: if cache_address_chain_length >= 64: logging.error(u'Maximum allowed cache address chain length reached.') break data_file = data_block_files.get(cache_address.filename, None) if not data_file: logging.debug(u'Cache address: 0x{0:08x} missing data file.'.format( cache_address.value)) break try: cache_entry = data_file.ReadCacheEntry(cache_address.block_offset) except (IOError, UnicodeDecodeError) as exception: logging.error( u'Unable to parse cache entry with error: {0:s}'.format( exception)) break event_object = ChromeCacheEntryEvent(cache_entry) parser_context.ProduceEvent( event_object, parser_chain=parser_chain, file_entry=file_entry) cache_address = cache_entry.next cache_address_chain_length += 1 for data_block_file in data_block_files.itervalues(): if data_block_file: data_block_file.Close() index_file.Close() manager.ParsersManager.RegisterParser(ChromeCacheParser)