plaso-rubanetra/plaso/parsers/firefox_cache.py
2020-04-06 18:48:34 +02:00

247 lines
8.4 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements a parser for Firefox cache files."""
import collections
import logging
import os
import construct
import pyparsing
from plaso.events import time_events
from plaso.lib import errors
from plaso.lib import eventdata
from plaso.parsers import interface
from plaso.parsers import manager
__author__ = 'Petter Bjelland (petter.bjelland@gmail.com)'
class FirefoxCacheEvent(time_events.PosixTimeEvent):
"""Convenience class for a Firefox cache record event."""
DATA_TYPE = 'firefox:cache:record'
def __init__(self, metadata, request_method, url, response_code):
super(FirefoxCacheEvent, self).__init__(
metadata.last_fetched, eventdata.EventTimestamp.ADDED_TIME)
self.last_modified = metadata.last_modified
self.major = metadata.major
self.minor = metadata.minor
self.location = metadata.location
self.last_fetched = metadata.last_fetched
self.expire_time = metadata.expire_time
self.fetch_count = metadata.fetch_count
self.request_size = metadata.request_size
self.info_size = metadata.info_size
self.data_size = metadata.data_size
self.request_method = request_method
self.url = url
self.response_code = response_code
class FirefoxCacheParser(interface.BaseParser):
"""Extract cached records from Firefox."""
NAME = 'firefox_cache'
DESCRIPTION = u'Parser for Firefox Cache files.'
# Number of bytes allocated to a cache record metadata.
RECORD_HEADER_SIZE = 36
# Initial size of Firefox >= 4 cache files.
INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4
# Smallest possible block size in Firefox cache files.
MIN_BLOCK_SIZE = 256
RECORD_HEADER_STRUCT = construct.Struct(
'record_header',
construct.UBInt16('major'),
construct.UBInt16('minor'),
construct.UBInt32('location'),
construct.UBInt32('fetch_count'),
construct.UBInt32('last_fetched'),
construct.UBInt32('last_modified'),
construct.UBInt32('expire_time'),
construct.UBInt32('data_size'),
construct.UBInt32('request_size'),
construct.UBInt32('info_size'))
ALTERNATIVE_CACHE_NAME = (
pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word('m', exact=1)
+ pyparsing.Word(pyparsing.nums, exact=2))
FIREFOX_CACHE_CONFIG = collections.namedtuple(
u'firefox_cache_config',
u'block_size first_record_offset')
REQUEST_METHODS = [
u'GET', 'HEAD', 'POST', 'PUT', 'DELETE',
u'TRACE', 'OPTIONS', 'CONNECT', 'PATCH']
def _GetFirefoxConfig(self, file_entry):
"""Determine cache file block size. Raises exception if not found."""
if file_entry.name[0:9] != '_CACHE_00':
try:
# Match alternative filename. Five hex characters + 'm' + two digit
# number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
# instead contain data only.
self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name)
except pyparsing.ParseException:
raise errors.UnableToParseFile(u'Not a Firefox cache file.')
file_object = file_entry.GetFileObject()
# There ought to be a valid record within the first 4MB. We use this
# limit to prevent reading large invalid files.
to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE)
while file_object.get_offset() < to_read:
offset = file_object.get_offset()
try:
# We have not yet determined the block size, so we use the smallest
# possible size.
record = self.__NextRecord(
file_entry.name, file_object, self.MIN_BLOCK_SIZE)
record_size = (
self.RECORD_HEADER_SIZE + record.request_size + record.info_size)
if record_size >= 4096:
# _CACHE_003_
block_size = 4096
elif record_size >= 1024:
# _CACHE_002_
block_size = 1024
else:
# _CACHE_001_
block_size = 256
return self.FIREFOX_CACHE_CONFIG(block_size, offset)
except IOError:
logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
self.NAME, file_entry.name, offset))
raise errors.UnableToParseFile(
u'Could not find a valid cache record. '
u'Not a Firefox cache file.')
def __Accept(self, candidate, block_size):
"""Determine whether the candidate is a valid cache record."""
record_size = (
self.RECORD_HEADER_SIZE + candidate.request_size+ candidate.info_size)
return (
candidate.request_size > 0 and candidate.fetch_count > 0 and
candidate.major == 1 and record_size // block_size < 256)
def __NextRecord(self, filename, file_object, block_size):
"""Provide the next cache record."""
offset = file_object.get_offset()
try:
candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object)
except (IOError, construct.FieldError):
raise IOError(u'Unable to parse stream.')
if not self.__Accept(candidate, block_size):
# Move reader to next candidate block.
file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR)
raise IOError(u'Not a valid Firefox cache record.')
# The last byte in a request is null.
url = file_object.read(candidate.request_size)[:-1]
# HTTP response header, even elements are keys, odd elements values.
headers = file_object.read(candidate.info_size)
request_method, _, _ = (
headers.partition('request-method\x00')[2].partition('\x00'))
_, _, response_head = headers.partition('response-head\x00')
response_code, _, _ = response_head.partition('\r\n')
if request_method not in self.REQUEST_METHODS:
safe_headers = headers.decode('ascii', errors='replace')
logging.debug((
u'[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response '
u'headers: \'{4:s}\'').format(
self.NAME, filename, offset, request_method, safe_headers))
if response_code[0:4] != 'HTTP':
safe_headers = headers.decode('ascii', errors='replace')
logging.debug((
u'[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. '
u'Response headers: \'{3:s}\'.').format(
self.NAME, filename, offset, safe_headers))
# A request can span multiple blocks, so we use modulo.
_, remainder = divmod(file_object.get_offset() - offset, block_size)
# Move reader to next candidate block. Include the null-byte skipped above.
file_object.seek(block_size - remainder, os.SEEK_CUR)
return FirefoxCacheEvent(candidate, request_method, url, response_code)
def Parse(self, parser_context, file_entry, parser_chain=None):
"""Extract records from a Firefox cache file.
Args:
parser_context: A parser context object (instance of ParserContext).
file_entry: A file entry object (instance of dfvfs.FileEntry).
parser_chain: Optional string containing the parsing chain up to this
point. The default is None.
"""
firefox_config = self._GetFirefoxConfig(file_entry)
# Add ourselves to the parser chain, which will be used in all subsequent
# event creation in this parser.
parser_chain = self._BuildParserChain(parser_chain)
file_object = file_entry.GetFileObject()
file_object.seek(firefox_config.first_record_offset)
while file_object.get_offset() < file_object.get_size():
try:
event_object = self.__NextRecord(
file_entry.name, file_object, firefox_config.block_size)
parser_context.ProduceEvent(
event_object, parser_chain=parser_chain, file_entry=file_entry)
except IOError:
logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid cache record.'.format(
self.NAME, file_entry.name,
file_object.get_offset() - self.MIN_BLOCK_SIZE))
file_object.close()
manager.ParsersManager.RegisterParser(FirefoxCacheParser)