442 lines
14 KiB
Python
442 lines
14 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2014 The Plaso Project Authors.
|
|
# Please see the AUTHORS file for details on individual authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Parser for Google Chrome and Chromium Cache files."""
|
|
|
|
import logging
|
|
import os
|
|
|
|
import construct
|
|
|
|
from dfvfs.resolver import resolver as path_spec_resolver
|
|
from dfvfs.path import factory as path_spec_factory
|
|
|
|
from plaso.events import time_events
|
|
from plaso.lib import errors
|
|
from plaso.lib import eventdata
|
|
from plaso.parsers import interface
|
|
from plaso.parsers import manager
|
|
|
|
|
|
class CacheAddress(object):
|
|
"""Class that contains a cache address."""
|
|
FILE_TYPE_SEPARATE = 0
|
|
FILE_TYPE_BLOCK_RANKINGS = 1
|
|
FILE_TYPE_BLOCK_256 = 2
|
|
FILE_TYPE_BLOCK_1024 = 3
|
|
FILE_TYPE_BLOCK_4096 = 4
|
|
|
|
_BLOCK_DATA_FILE_TYPES = [
|
|
FILE_TYPE_BLOCK_RANKINGS,
|
|
FILE_TYPE_BLOCK_256,
|
|
FILE_TYPE_BLOCK_1024,
|
|
FILE_TYPE_BLOCK_4096]
|
|
|
|
_FILE_TYPE_BLOCK_SIZES = [0, 36, 256, 1024, 4096]
|
|
|
|
def __init__(self, cache_address):
|
|
"""Initializes the cache address object.
|
|
|
|
Args:
|
|
cache_address: the cache address value.
|
|
"""
|
|
super(CacheAddress, self).__init__()
|
|
self.block_number = None
|
|
self.block_offset = None
|
|
self.block_size = None
|
|
self.filename = None
|
|
self.value = cache_address
|
|
|
|
if cache_address & 0x80000000:
|
|
self.is_initialized = u'True'
|
|
else:
|
|
self.is_initialized = u'False'
|
|
|
|
self.file_type = (cache_address & 0x70000000) >> 28
|
|
if not cache_address == 0x00000000:
|
|
if self.file_type == self.FILE_TYPE_SEPARATE:
|
|
file_selector = cache_address & 0x0fffffff
|
|
self.filename = u'f_{0:06x}'.format(file_selector)
|
|
|
|
elif self.file_type in self._BLOCK_DATA_FILE_TYPES:
|
|
file_selector = (cache_address & 0x00ff0000) >> 16
|
|
self.filename = u'data_{0:d}'.format(file_selector)
|
|
|
|
file_block_size = self._FILE_TYPE_BLOCK_SIZES[self.file_type]
|
|
self.block_number = cache_address & 0x0000ffff
|
|
self.block_size = (cache_address & 0x03000000) >> 24
|
|
self.block_size *= file_block_size
|
|
self.block_offset = 8192 + (self.block_number * file_block_size)
|
|
|
|
|
|
class CacheEntry(object):
|
|
"""Class that contains a cache entry."""
|
|
|
|
def __init__(self):
|
|
"""Initializes the cache entry object."""
|
|
super(CacheEntry, self).__init__()
|
|
self.creation_time = None
|
|
self.hash = None
|
|
self.key = None
|
|
self.next = None
|
|
self.rankings_node = None
|
|
|
|
|
|
class IndexFile(object):
|
|
"""Class that contains an index file."""
|
|
|
|
SIGNATURE = 0xc103cac3
|
|
|
|
_FILE_HEADER = construct.Struct(
|
|
'chrome_cache_index_file_header',
|
|
construct.ULInt32('signature'),
|
|
construct.ULInt16('minor_version'),
|
|
construct.ULInt16('major_version'),
|
|
construct.ULInt32('number_of_entries'),
|
|
construct.ULInt32('stored_data_size'),
|
|
construct.ULInt32('last_created_file_number'),
|
|
construct.ULInt32('unknown1'),
|
|
construct.ULInt32('unknown2'),
|
|
construct.ULInt32('table_size'),
|
|
construct.ULInt32('unknown3'),
|
|
construct.ULInt32('unknown4'),
|
|
construct.ULInt64('creation_time'),
|
|
construct.Padding(208))
|
|
|
|
def __init__(self):
|
|
"""Initializes the index file object."""
|
|
super(IndexFile, self).__init__()
|
|
self._file_object = None
|
|
self.creation_time = None
|
|
self.version = None
|
|
self.index_table = []
|
|
|
|
def _ReadFileHeader(self):
|
|
"""Reads the file header.
|
|
|
|
Raises:
|
|
IOError: if the file header cannot be read.
|
|
"""
|
|
self._file_object.seek(0, os.SEEK_SET)
|
|
|
|
try:
|
|
file_header = self._FILE_HEADER.parse_stream(self._file_object)
|
|
except construct.FieldError as exception:
|
|
raise IOError(u'Unable to parse file header with error: {0:s}'.format(
|
|
exception))
|
|
|
|
signature = file_header.get('signature')
|
|
|
|
if signature != self.SIGNATURE:
|
|
raise IOError(u'Unsupported index file signature')
|
|
|
|
self.version = u'{0:d}.{1:d}'.format(
|
|
file_header.get('major_version'),
|
|
file_header.get('minor_version'))
|
|
|
|
if self.version not in [u'2.0', u'2.1']:
|
|
raise IOError(u'Unsupported index file version: {0:s}'.format(
|
|
self.version))
|
|
|
|
self.creation_time = file_header.get('creation_time')
|
|
|
|
def _ReadIndexTable(self):
|
|
"""Reads the index table."""
|
|
cache_address_data = self._file_object.read(4)
|
|
|
|
while len(cache_address_data) == 4:
|
|
value = construct.ULInt32('cache_address').parse(cache_address_data)
|
|
|
|
if value:
|
|
cache_address = CacheAddress(value)
|
|
self.index_table.append(cache_address)
|
|
|
|
cache_address_data = self._file_object.read(4)
|
|
|
|
def Close(self):
|
|
"""Closes the index file."""
|
|
if self._file_object:
|
|
self._file_object.close()
|
|
self._file_object = None
|
|
|
|
def Open(self, file_object):
|
|
"""Opens the index file.
|
|
|
|
Args:
|
|
file_object: the file object.
|
|
"""
|
|
self._file_object = file_object
|
|
self._ReadFileHeader()
|
|
# Skip over the LRU data, which is 112 bytes in size.
|
|
self._file_object.seek(112, os.SEEK_CUR)
|
|
self._ReadIndexTable()
|
|
|
|
|
|
class DataBlockFile(object):
|
|
"""Class that contains a data block file."""
|
|
|
|
SIGNATURE = 0xc104cac3
|
|
|
|
_FILE_HEADER = construct.Struct(
|
|
'chrome_cache_data_file_header',
|
|
construct.ULInt32('signature'),
|
|
construct.ULInt16('minor_version'),
|
|
construct.ULInt16('major_version'),
|
|
construct.ULInt16('file_number'),
|
|
construct.ULInt16('next_file_number'),
|
|
construct.ULInt32('block_size'),
|
|
construct.ULInt32('number_of_entries'),
|
|
construct.ULInt32('maximum_number_of_entries'),
|
|
construct.Array(4, construct.ULInt32('emtpy')),
|
|
construct.Array(4, construct.ULInt32('hints')),
|
|
construct.ULInt32('updating'),
|
|
construct.Array(5, construct.ULInt32('user')))
|
|
|
|
_CACHE_ENTRY = construct.Struct(
|
|
'chrome_cache_entry',
|
|
construct.ULInt32('hash'),
|
|
construct.ULInt32('next_address'),
|
|
construct.ULInt32('rankings_node_address'),
|
|
construct.ULInt32('reuse_count'),
|
|
construct.ULInt32('refetch_count'),
|
|
construct.ULInt32('state'),
|
|
construct.ULInt64('creation_time'),
|
|
construct.ULInt32('key_size'),
|
|
construct.ULInt32('long_key_address'),
|
|
construct.Array(4, construct.ULInt32('data_stream_sizes')),
|
|
construct.Array(4, construct.ULInt32('data_stream_addresses')),
|
|
construct.ULInt32('flags'),
|
|
construct.Padding(16),
|
|
construct.ULInt32('self_hash'),
|
|
construct.Array(160, construct.UBInt8('key')))
|
|
|
|
def __init__(self):
|
|
"""Initializes the data block file object."""
|
|
super(DataBlockFile, self).__init__()
|
|
self._file_object = None
|
|
self.creation_time = None
|
|
self.block_size = None
|
|
self.number_of_entries = None
|
|
self.version = None
|
|
|
|
def _ReadFileHeader(self):
|
|
"""Reads the file header.
|
|
|
|
Raises:
|
|
IOError: if the file header cannot be read.
|
|
"""
|
|
self._file_object.seek(0, os.SEEK_SET)
|
|
|
|
try:
|
|
file_header = self._FILE_HEADER.parse_stream(self._file_object)
|
|
except construct.FieldError as exception:
|
|
raise IOError(u'Unable to parse file header with error: {0:s}'.format(
|
|
exception))
|
|
|
|
signature = file_header.get('signature')
|
|
|
|
if signature != self.SIGNATURE:
|
|
raise IOError(u'Unsupported data block file signature')
|
|
|
|
self.version = u'{0:d}.{1:d}'.format(
|
|
file_header.get('major_version'),
|
|
file_header.get('minor_version'))
|
|
|
|
if self.version not in [u'2.0', u'2.1']:
|
|
raise IOError(u'Unsupported data block file version: {0:s}'.format(
|
|
self.version))
|
|
|
|
self.block_size = file_header.get('block_size')
|
|
self.number_of_entries = file_header.get('number_of_entries')
|
|
|
|
def ReadCacheEntry(self, block_offset):
|
|
"""Reads a cache entry."""
|
|
self._file_object.seek(block_offset, os.SEEK_SET)
|
|
|
|
try:
|
|
cache_entry_struct = self._CACHE_ENTRY.parse_stream(self._file_object)
|
|
except construct.FieldError as exception:
|
|
raise IOError(u'Unable to parse cache entry with error: {0:s}'.format(
|
|
exception))
|
|
|
|
cache_entry = CacheEntry()
|
|
|
|
cache_entry.hash = cache_entry_struct.get('hash')
|
|
|
|
cache_entry.next = CacheAddress(cache_entry_struct.get('next_address'))
|
|
cache_entry.rankings_node = CacheAddress(cache_entry_struct.get(
|
|
'rankings_node_address'))
|
|
|
|
cache_entry.creation_time = cache_entry_struct.get('creation_time')
|
|
|
|
byte_array = cache_entry_struct.get('key')
|
|
string = u''.join(map(chr, byte_array))
|
|
cache_entry.key, _, _ = string.partition(u'\x00')
|
|
|
|
return cache_entry
|
|
|
|
def Close(self):
|
|
"""Closes the data block file."""
|
|
if self._file_object:
|
|
self._file_object.close()
|
|
self._file_object = None
|
|
|
|
def Open(self, file_object):
|
|
"""Opens the data block file.
|
|
|
|
Args:
|
|
file_object: the file object.
|
|
"""
|
|
self._file_object = file_object
|
|
self._ReadFileHeader()
|
|
|
|
|
|
class ChromeCacheEntryEvent(time_events.WebKitTimeEvent):
|
|
"""Class that contains a Chrome Cache event."""
|
|
|
|
DATA_TYPE = 'chrome:cache:entry'
|
|
|
|
def __init__(self, cache_entry):
|
|
"""Initializes the event object.
|
|
|
|
Args:
|
|
cache_entry: the cache entry (instance of CacheEntry).
|
|
"""
|
|
super(ChromeCacheEntryEvent, self).__init__(
|
|
cache_entry.creation_time, eventdata.EventTimestamp.CREATION_TIME)
|
|
self.original_url = cache_entry.key
|
|
|
|
|
|
class ChromeCacheParser(interface.BaseParser):
|
|
"""Parses Chrome Cache files."""
|
|
|
|
NAME = 'chrome_cache'
|
|
DESCRIPTION = u'Parser for Chrome Cache files.'
|
|
|
|
def Parse(self, parser_context, file_entry, parser_chain=None):
|
|
"""Extract event objects from Chrome Cache files.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
file_entry: A file entry object (instance of dfvfs.FileEntry).
|
|
parser_chain: Optional string containing the parsing chain up to this
|
|
point. The default is None.
|
|
"""
|
|
file_object = file_entry.GetFileObject()
|
|
index_file = IndexFile()
|
|
try:
|
|
index_file.Open(file_object)
|
|
except IOError as exception:
|
|
file_object.close()
|
|
raise errors.UnableToParseFile(
|
|
u'[{0:s}] unable to parse index file {1:s} with error: {2:s}'.format(
|
|
self.NAME, file_entry.name, exception))
|
|
|
|
# Build a lookup table for the data block files.
|
|
file_system = file_entry.GetFileSystem()
|
|
path_segments = file_system.SplitPath(file_entry.path_spec.location)
|
|
|
|
# Add ourselves to the parser chain, which will be used in all subsequent
|
|
# event creation in this parser.
|
|
parser_chain = self._BuildParserChain(parser_chain)
|
|
|
|
data_block_files = {}
|
|
for cache_address in index_file.index_table:
|
|
if cache_address.filename not in data_block_files:
|
|
# Remove the previous filename from the path segments list and
|
|
# add one of the data block file.
|
|
path_segments.pop()
|
|
path_segments.append(cache_address.filename)
|
|
|
|
# We need to pass only used arguments to the path specification
|
|
# factory otherwise it will raise.
|
|
kwargs = {}
|
|
if file_entry.path_spec.parent:
|
|
kwargs['parent'] = file_entry.path_spec.parent
|
|
kwargs['location'] = file_system.JoinPath(path_segments)
|
|
|
|
data_block_file_path_spec = path_spec_factory.Factory.NewPathSpec(
|
|
file_entry.path_spec.TYPE_INDICATOR, **kwargs)
|
|
|
|
try:
|
|
data_block_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
|
|
data_block_file_path_spec)
|
|
except RuntimeError as exception:
|
|
logging.error((
|
|
u'[{0:s}] Unable to open data block file: {1:s} while parsing '
|
|
u'{2:s} with error: {3:s}').format(
|
|
parser_chain, kwargs['location'],
|
|
file_entry.path_spec.comparable, exception))
|
|
data_block_file_entry = None
|
|
|
|
if not data_block_file_entry:
|
|
logging.error(u'Missing data block file: {0:s}'.format(
|
|
cache_address.filename))
|
|
data_block_file = None
|
|
|
|
else:
|
|
data_block_file_object = data_block_file_entry.GetFileObject()
|
|
data_block_file = DataBlockFile()
|
|
|
|
try:
|
|
data_block_file.Open(data_block_file_object)
|
|
except IOError as exception:
|
|
logging.error((
|
|
u'Unable to open data block file: {0:s} with error: '
|
|
u'{1:s}').format(cache_address.filename, exception))
|
|
data_block_file = None
|
|
|
|
data_block_files[cache_address.filename] = data_block_file
|
|
|
|
# Parse the cache entries in the data block files.
|
|
for cache_address in index_file.index_table:
|
|
cache_address_chain_length = 0
|
|
while cache_address.value != 0x00000000:
|
|
if cache_address_chain_length >= 64:
|
|
logging.error(u'Maximum allowed cache address chain length reached.')
|
|
break
|
|
|
|
data_file = data_block_files.get(cache_address.filename, None)
|
|
if not data_file:
|
|
logging.debug(u'Cache address: 0x{0:08x} missing data file.'.format(
|
|
cache_address.value))
|
|
break
|
|
|
|
try:
|
|
cache_entry = data_file.ReadCacheEntry(cache_address.block_offset)
|
|
except (IOError, UnicodeDecodeError) as exception:
|
|
logging.error(
|
|
u'Unable to parse cache entry with error: {0:s}'.format(
|
|
exception))
|
|
break
|
|
|
|
event_object = ChromeCacheEntryEvent(cache_entry)
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
cache_address = cache_entry.next
|
|
cache_address_chain_length += 1
|
|
|
|
for data_block_file in data_block_files.itervalues():
|
|
if data_block_file:
|
|
data_block_file.Close()
|
|
|
|
index_file.Close()
|
|
|
|
|
|
manager.ParsersManager.RegisterParser(ChromeCacheParser)
|