plaso-rubanetra/plaso/parsers/chrome_cache.py
2020-04-06 18:48:34 +02:00

442 lines
14 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parser for Google Chrome and Chromium Cache files."""
import logging
import os
import construct
from dfvfs.resolver import resolver as path_spec_resolver
from dfvfs.path import factory as path_spec_factory
from plaso.events import time_events
from plaso.lib import errors
from plaso.lib import eventdata
from plaso.parsers import interface
from plaso.parsers import manager
class CacheAddress(object):
"""Class that contains a cache address."""
FILE_TYPE_SEPARATE = 0
FILE_TYPE_BLOCK_RANKINGS = 1
FILE_TYPE_BLOCK_256 = 2
FILE_TYPE_BLOCK_1024 = 3
FILE_TYPE_BLOCK_4096 = 4
_BLOCK_DATA_FILE_TYPES = [
FILE_TYPE_BLOCK_RANKINGS,
FILE_TYPE_BLOCK_256,
FILE_TYPE_BLOCK_1024,
FILE_TYPE_BLOCK_4096]
_FILE_TYPE_BLOCK_SIZES = [0, 36, 256, 1024, 4096]
def __init__(self, cache_address):
"""Initializes the cache address object.
Args:
cache_address: the cache address value.
"""
super(CacheAddress, self).__init__()
self.block_number = None
self.block_offset = None
self.block_size = None
self.filename = None
self.value = cache_address
if cache_address & 0x80000000:
self.is_initialized = u'True'
else:
self.is_initialized = u'False'
self.file_type = (cache_address & 0x70000000) >> 28
if not cache_address == 0x00000000:
if self.file_type == self.FILE_TYPE_SEPARATE:
file_selector = cache_address & 0x0fffffff
self.filename = u'f_{0:06x}'.format(file_selector)
elif self.file_type in self._BLOCK_DATA_FILE_TYPES:
file_selector = (cache_address & 0x00ff0000) >> 16
self.filename = u'data_{0:d}'.format(file_selector)
file_block_size = self._FILE_TYPE_BLOCK_SIZES[self.file_type]
self.block_number = cache_address & 0x0000ffff
self.block_size = (cache_address & 0x03000000) >> 24
self.block_size *= file_block_size
self.block_offset = 8192 + (self.block_number * file_block_size)
class CacheEntry(object):
"""Class that contains a cache entry."""
def __init__(self):
"""Initializes the cache entry object."""
super(CacheEntry, self).__init__()
self.creation_time = None
self.hash = None
self.key = None
self.next = None
self.rankings_node = None
class IndexFile(object):
"""Class that contains an index file."""
SIGNATURE = 0xc103cac3
_FILE_HEADER = construct.Struct(
'chrome_cache_index_file_header',
construct.ULInt32('signature'),
construct.ULInt16('minor_version'),
construct.ULInt16('major_version'),
construct.ULInt32('number_of_entries'),
construct.ULInt32('stored_data_size'),
construct.ULInt32('last_created_file_number'),
construct.ULInt32('unknown1'),
construct.ULInt32('unknown2'),
construct.ULInt32('table_size'),
construct.ULInt32('unknown3'),
construct.ULInt32('unknown4'),
construct.ULInt64('creation_time'),
construct.Padding(208))
def __init__(self):
"""Initializes the index file object."""
super(IndexFile, self).__init__()
self._file_object = None
self.creation_time = None
self.version = None
self.index_table = []
def _ReadFileHeader(self):
"""Reads the file header.
Raises:
IOError: if the file header cannot be read.
"""
self._file_object.seek(0, os.SEEK_SET)
try:
file_header = self._FILE_HEADER.parse_stream(self._file_object)
except construct.FieldError as exception:
raise IOError(u'Unable to parse file header with error: {0:s}'.format(
exception))
signature = file_header.get('signature')
if signature != self.SIGNATURE:
raise IOError(u'Unsupported index file signature')
self.version = u'{0:d}.{1:d}'.format(
file_header.get('major_version'),
file_header.get('minor_version'))
if self.version not in [u'2.0', u'2.1']:
raise IOError(u'Unsupported index file version: {0:s}'.format(
self.version))
self.creation_time = file_header.get('creation_time')
def _ReadIndexTable(self):
"""Reads the index table."""
cache_address_data = self._file_object.read(4)
while len(cache_address_data) == 4:
value = construct.ULInt32('cache_address').parse(cache_address_data)
if value:
cache_address = CacheAddress(value)
self.index_table.append(cache_address)
cache_address_data = self._file_object.read(4)
def Close(self):
"""Closes the index file."""
if self._file_object:
self._file_object.close()
self._file_object = None
def Open(self, file_object):
"""Opens the index file.
Args:
file_object: the file object.
"""
self._file_object = file_object
self._ReadFileHeader()
# Skip over the LRU data, which is 112 bytes in size.
self._file_object.seek(112, os.SEEK_CUR)
self._ReadIndexTable()
class DataBlockFile(object):
"""Class that contains a data block file."""
SIGNATURE = 0xc104cac3
_FILE_HEADER = construct.Struct(
'chrome_cache_data_file_header',
construct.ULInt32('signature'),
construct.ULInt16('minor_version'),
construct.ULInt16('major_version'),
construct.ULInt16('file_number'),
construct.ULInt16('next_file_number'),
construct.ULInt32('block_size'),
construct.ULInt32('number_of_entries'),
construct.ULInt32('maximum_number_of_entries'),
construct.Array(4, construct.ULInt32('emtpy')),
construct.Array(4, construct.ULInt32('hints')),
construct.ULInt32('updating'),
construct.Array(5, construct.ULInt32('user')))
_CACHE_ENTRY = construct.Struct(
'chrome_cache_entry',
construct.ULInt32('hash'),
construct.ULInt32('next_address'),
construct.ULInt32('rankings_node_address'),
construct.ULInt32('reuse_count'),
construct.ULInt32('refetch_count'),
construct.ULInt32('state'),
construct.ULInt64('creation_time'),
construct.ULInt32('key_size'),
construct.ULInt32('long_key_address'),
construct.Array(4, construct.ULInt32('data_stream_sizes')),
construct.Array(4, construct.ULInt32('data_stream_addresses')),
construct.ULInt32('flags'),
construct.Padding(16),
construct.ULInt32('self_hash'),
construct.Array(160, construct.UBInt8('key')))
def __init__(self):
"""Initializes the data block file object."""
super(DataBlockFile, self).__init__()
self._file_object = None
self.creation_time = None
self.block_size = None
self.number_of_entries = None
self.version = None
def _ReadFileHeader(self):
"""Reads the file header.
Raises:
IOError: if the file header cannot be read.
"""
self._file_object.seek(0, os.SEEK_SET)
try:
file_header = self._FILE_HEADER.parse_stream(self._file_object)
except construct.FieldError as exception:
raise IOError(u'Unable to parse file header with error: {0:s}'.format(
exception))
signature = file_header.get('signature')
if signature != self.SIGNATURE:
raise IOError(u'Unsupported data block file signature')
self.version = u'{0:d}.{1:d}'.format(
file_header.get('major_version'),
file_header.get('minor_version'))
if self.version not in [u'2.0', u'2.1']:
raise IOError(u'Unsupported data block file version: {0:s}'.format(
self.version))
self.block_size = file_header.get('block_size')
self.number_of_entries = file_header.get('number_of_entries')
def ReadCacheEntry(self, block_offset):
"""Reads a cache entry."""
self._file_object.seek(block_offset, os.SEEK_SET)
try:
cache_entry_struct = self._CACHE_ENTRY.parse_stream(self._file_object)
except construct.FieldError as exception:
raise IOError(u'Unable to parse cache entry with error: {0:s}'.format(
exception))
cache_entry = CacheEntry()
cache_entry.hash = cache_entry_struct.get('hash')
cache_entry.next = CacheAddress(cache_entry_struct.get('next_address'))
cache_entry.rankings_node = CacheAddress(cache_entry_struct.get(
'rankings_node_address'))
cache_entry.creation_time = cache_entry_struct.get('creation_time')
byte_array = cache_entry_struct.get('key')
string = u''.join(map(chr, byte_array))
cache_entry.key, _, _ = string.partition(u'\x00')
return cache_entry
def Close(self):
"""Closes the data block file."""
if self._file_object:
self._file_object.close()
self._file_object = None
def Open(self, file_object):
"""Opens the data block file.
Args:
file_object: the file object.
"""
self._file_object = file_object
self._ReadFileHeader()
class ChromeCacheEntryEvent(time_events.WebKitTimeEvent):
"""Class that contains a Chrome Cache event."""
DATA_TYPE = 'chrome:cache:entry'
def __init__(self, cache_entry):
"""Initializes the event object.
Args:
cache_entry: the cache entry (instance of CacheEntry).
"""
super(ChromeCacheEntryEvent, self).__init__(
cache_entry.creation_time, eventdata.EventTimestamp.CREATION_TIME)
self.original_url = cache_entry.key
class ChromeCacheParser(interface.BaseParser):
"""Parses Chrome Cache files."""
NAME = 'chrome_cache'
DESCRIPTION = u'Parser for Chrome Cache files.'
def Parse(self, parser_context, file_entry, parser_chain=None):
"""Extract event objects from Chrome Cache files.
Args:
parser_context: A parser context object (instance of ParserContext).
file_entry: A file entry object (instance of dfvfs.FileEntry).
parser_chain: Optional string containing the parsing chain up to this
point. The default is None.
"""
file_object = file_entry.GetFileObject()
index_file = IndexFile()
try:
index_file.Open(file_object)
except IOError as exception:
file_object.close()
raise errors.UnableToParseFile(
u'[{0:s}] unable to parse index file {1:s} with error: {2:s}'.format(
self.NAME, file_entry.name, exception))
# Build a lookup table for the data block files.
file_system = file_entry.GetFileSystem()
path_segments = file_system.SplitPath(file_entry.path_spec.location)
# Add ourselves to the parser chain, which will be used in all subsequent
# event creation in this parser.
parser_chain = self._BuildParserChain(parser_chain)
data_block_files = {}
for cache_address in index_file.index_table:
if cache_address.filename not in data_block_files:
# Remove the previous filename from the path segments list and
# add one of the data block file.
path_segments.pop()
path_segments.append(cache_address.filename)
# We need to pass only used arguments to the path specification
# factory otherwise it will raise.
kwargs = {}
if file_entry.path_spec.parent:
kwargs['parent'] = file_entry.path_spec.parent
kwargs['location'] = file_system.JoinPath(path_segments)
data_block_file_path_spec = path_spec_factory.Factory.NewPathSpec(
file_entry.path_spec.TYPE_INDICATOR, **kwargs)
try:
data_block_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
data_block_file_path_spec)
except RuntimeError as exception:
logging.error((
u'[{0:s}] Unable to open data block file: {1:s} while parsing '
u'{2:s} with error: {3:s}').format(
parser_chain, kwargs['location'],
file_entry.path_spec.comparable, exception))
data_block_file_entry = None
if not data_block_file_entry:
logging.error(u'Missing data block file: {0:s}'.format(
cache_address.filename))
data_block_file = None
else:
data_block_file_object = data_block_file_entry.GetFileObject()
data_block_file = DataBlockFile()
try:
data_block_file.Open(data_block_file_object)
except IOError as exception:
logging.error((
u'Unable to open data block file: {0:s} with error: '
u'{1:s}').format(cache_address.filename, exception))
data_block_file = None
data_block_files[cache_address.filename] = data_block_file
# Parse the cache entries in the data block files.
for cache_address in index_file.index_table:
cache_address_chain_length = 0
while cache_address.value != 0x00000000:
if cache_address_chain_length >= 64:
logging.error(u'Maximum allowed cache address chain length reached.')
break
data_file = data_block_files.get(cache_address.filename, None)
if not data_file:
logging.debug(u'Cache address: 0x{0:08x} missing data file.'.format(
cache_address.value))
break
try:
cache_entry = data_file.ReadCacheEntry(cache_address.block_offset)
except (IOError, UnicodeDecodeError) as exception:
logging.error(
u'Unable to parse cache entry with error: {0:s}'.format(
exception))
break
event_object = ChromeCacheEntryEvent(cache_entry)
parser_context.ProduceEvent(
event_object, parser_chain=parser_chain, file_entry=file_entry)
cache_address = cache_entry.next
cache_address_chain_length += 1
for data_block_file in data_block_files.itervalues():
if data_block_file:
data_block_file.Close()
index_file.Close()
manager.ParsersManager.RegisterParser(ChromeCacheParser)