247 lines
8.4 KiB
Python
247 lines
8.4 KiB
Python
|
#!/usr/bin/python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
#
|
||
|
# Copyright 2014 The Plaso Project Authors.
|
||
|
# Please see the AUTHORS file for details on individual authors.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
"""Implements a parser for Firefox cache files."""
|
||
|
|
||
|
import collections
|
||
|
import logging
|
||
|
import os
|
||
|
|
||
|
import construct
|
||
|
import pyparsing
|
||
|
|
||
|
from plaso.events import time_events
|
||
|
from plaso.lib import errors
|
||
|
from plaso.lib import eventdata
|
||
|
from plaso.parsers import interface
|
||
|
from plaso.parsers import manager
|
||
|
|
||
|
|
||
|
__author__ = 'Petter Bjelland (petter.bjelland@gmail.com)'
|
||
|
|
||
|
|
||
|
class FirefoxCacheEvent(time_events.PosixTimeEvent):
|
||
|
"""Convenience class for a Firefox cache record event."""
|
||
|
|
||
|
DATA_TYPE = 'firefox:cache:record'
|
||
|
|
||
|
def __init__(self, metadata, request_method, url, response_code):
|
||
|
super(FirefoxCacheEvent, self).__init__(
|
||
|
metadata.last_fetched, eventdata.EventTimestamp.ADDED_TIME)
|
||
|
|
||
|
self.last_modified = metadata.last_modified
|
||
|
self.major = metadata.major
|
||
|
self.minor = metadata.minor
|
||
|
self.location = metadata.location
|
||
|
self.last_fetched = metadata.last_fetched
|
||
|
self.expire_time = metadata.expire_time
|
||
|
self.fetch_count = metadata.fetch_count
|
||
|
self.request_size = metadata.request_size
|
||
|
self.info_size = metadata.info_size
|
||
|
self.data_size = metadata.data_size
|
||
|
self.request_method = request_method
|
||
|
self.url = url
|
||
|
self.response_code = response_code
|
||
|
|
||
|
|
||
|
class FirefoxCacheParser(interface.BaseParser):
|
||
|
"""Extract cached records from Firefox."""
|
||
|
|
||
|
NAME = 'firefox_cache'
|
||
|
DESCRIPTION = u'Parser for Firefox Cache files.'
|
||
|
|
||
|
# Number of bytes allocated to a cache record metadata.
|
||
|
RECORD_HEADER_SIZE = 36
|
||
|
|
||
|
# Initial size of Firefox >= 4 cache files.
|
||
|
INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4
|
||
|
|
||
|
# Smallest possible block size in Firefox cache files.
|
||
|
MIN_BLOCK_SIZE = 256
|
||
|
|
||
|
RECORD_HEADER_STRUCT = construct.Struct(
|
||
|
'record_header',
|
||
|
construct.UBInt16('major'),
|
||
|
construct.UBInt16('minor'),
|
||
|
construct.UBInt32('location'),
|
||
|
construct.UBInt32('fetch_count'),
|
||
|
construct.UBInt32('last_fetched'),
|
||
|
construct.UBInt32('last_modified'),
|
||
|
construct.UBInt32('expire_time'),
|
||
|
construct.UBInt32('data_size'),
|
||
|
construct.UBInt32('request_size'),
|
||
|
construct.UBInt32('info_size'))
|
||
|
|
||
|
ALTERNATIVE_CACHE_NAME = (
|
||
|
pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word('m', exact=1)
|
||
|
+ pyparsing.Word(pyparsing.nums, exact=2))
|
||
|
|
||
|
FIREFOX_CACHE_CONFIG = collections.namedtuple(
|
||
|
u'firefox_cache_config',
|
||
|
u'block_size first_record_offset')
|
||
|
|
||
|
REQUEST_METHODS = [
|
||
|
u'GET', 'HEAD', 'POST', 'PUT', 'DELETE',
|
||
|
u'TRACE', 'OPTIONS', 'CONNECT', 'PATCH']
|
||
|
|
||
|
def _GetFirefoxConfig(self, file_entry):
|
||
|
"""Determine cache file block size. Raises exception if not found."""
|
||
|
|
||
|
if file_entry.name[0:9] != '_CACHE_00':
|
||
|
try:
|
||
|
# Match alternative filename. Five hex characters + 'm' + two digit
|
||
|
# number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
|
||
|
# instead contain data only.
|
||
|
self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name)
|
||
|
except pyparsing.ParseException:
|
||
|
raise errors.UnableToParseFile(u'Not a Firefox cache file.')
|
||
|
|
||
|
file_object = file_entry.GetFileObject()
|
||
|
|
||
|
# There ought to be a valid record within the first 4MB. We use this
|
||
|
# limit to prevent reading large invalid files.
|
||
|
to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE)
|
||
|
|
||
|
while file_object.get_offset() < to_read:
|
||
|
offset = file_object.get_offset()
|
||
|
|
||
|
try:
|
||
|
# We have not yet determined the block size, so we use the smallest
|
||
|
# possible size.
|
||
|
record = self.__NextRecord(
|
||
|
file_entry.name, file_object, self.MIN_BLOCK_SIZE)
|
||
|
|
||
|
record_size = (
|
||
|
self.RECORD_HEADER_SIZE + record.request_size + record.info_size)
|
||
|
|
||
|
if record_size >= 4096:
|
||
|
# _CACHE_003_
|
||
|
block_size = 4096
|
||
|
elif record_size >= 1024:
|
||
|
# _CACHE_002_
|
||
|
block_size = 1024
|
||
|
else:
|
||
|
# _CACHE_001_
|
||
|
block_size = 256
|
||
|
|
||
|
return self.FIREFOX_CACHE_CONFIG(block_size, offset)
|
||
|
|
||
|
except IOError:
|
||
|
logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
|
||
|
self.NAME, file_entry.name, offset))
|
||
|
|
||
|
raise errors.UnableToParseFile(
|
||
|
u'Could not find a valid cache record. '
|
||
|
u'Not a Firefox cache file.')
|
||
|
|
||
|
def __Accept(self, candidate, block_size):
|
||
|
"""Determine whether the candidate is a valid cache record."""
|
||
|
|
||
|
record_size = (
|
||
|
self.RECORD_HEADER_SIZE + candidate.request_size+ candidate.info_size)
|
||
|
|
||
|
return (
|
||
|
candidate.request_size > 0 and candidate.fetch_count > 0 and
|
||
|
candidate.major == 1 and record_size // block_size < 256)
|
||
|
|
||
|
def __NextRecord(self, filename, file_object, block_size):
|
||
|
"""Provide the next cache record."""
|
||
|
|
||
|
offset = file_object.get_offset()
|
||
|
|
||
|
try:
|
||
|
candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object)
|
||
|
except (IOError, construct.FieldError):
|
||
|
raise IOError(u'Unable to parse stream.')
|
||
|
|
||
|
if not self.__Accept(candidate, block_size):
|
||
|
# Move reader to next candidate block.
|
||
|
file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR)
|
||
|
raise IOError(u'Not a valid Firefox cache record.')
|
||
|
|
||
|
# The last byte in a request is null.
|
||
|
url = file_object.read(candidate.request_size)[:-1]
|
||
|
|
||
|
# HTTP response header, even elements are keys, odd elements values.
|
||
|
headers = file_object.read(candidate.info_size)
|
||
|
|
||
|
request_method, _, _ = (
|
||
|
headers.partition('request-method\x00')[2].partition('\x00'))
|
||
|
|
||
|
_, _, response_head = headers.partition('response-head\x00')
|
||
|
|
||
|
response_code, _, _ = response_head.partition('\r\n')
|
||
|
|
||
|
if request_method not in self.REQUEST_METHODS:
|
||
|
safe_headers = headers.decode('ascii', errors='replace')
|
||
|
logging.debug((
|
||
|
u'[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response '
|
||
|
u'headers: \'{4:s}\'').format(
|
||
|
self.NAME, filename, offset, request_method, safe_headers))
|
||
|
|
||
|
if response_code[0:4] != 'HTTP':
|
||
|
safe_headers = headers.decode('ascii', errors='replace')
|
||
|
logging.debug((
|
||
|
u'[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. '
|
||
|
u'Response headers: \'{3:s}\'.').format(
|
||
|
self.NAME, filename, offset, safe_headers))
|
||
|
|
||
|
# A request can span multiple blocks, so we use modulo.
|
||
|
_, remainder = divmod(file_object.get_offset() - offset, block_size)
|
||
|
|
||
|
# Move reader to next candidate block. Include the null-byte skipped above.
|
||
|
file_object.seek(block_size - remainder, os.SEEK_CUR)
|
||
|
|
||
|
return FirefoxCacheEvent(candidate, request_method, url, response_code)
|
||
|
|
||
|
def Parse(self, parser_context, file_entry, parser_chain=None):
|
||
|
"""Extract records from a Firefox cache file.
|
||
|
|
||
|
Args:
|
||
|
parser_context: A parser context object (instance of ParserContext).
|
||
|
file_entry: A file entry object (instance of dfvfs.FileEntry).
|
||
|
parser_chain: Optional string containing the parsing chain up to this
|
||
|
point. The default is None.
|
||
|
"""
|
||
|
firefox_config = self._GetFirefoxConfig(file_entry)
|
||
|
|
||
|
# Add ourselves to the parser chain, which will be used in all subsequent
|
||
|
# event creation in this parser.
|
||
|
parser_chain = self._BuildParserChain(parser_chain)
|
||
|
|
||
|
file_object = file_entry.GetFileObject()
|
||
|
|
||
|
file_object.seek(firefox_config.first_record_offset)
|
||
|
|
||
|
while file_object.get_offset() < file_object.get_size():
|
||
|
try:
|
||
|
event_object = self.__NextRecord(
|
||
|
file_entry.name, file_object, firefox_config.block_size)
|
||
|
|
||
|
parser_context.ProduceEvent(
|
||
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
||
|
|
||
|
except IOError:
|
||
|
logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid cache record.'.format(
|
||
|
self.NAME, file_entry.name,
|
||
|
file_object.get_offset() - self.MIN_BLOCK_SIZE))
|
||
|
|
||
|
file_object.close()
|
||
|
|
||
|
|
||
|
manager.ParsersManager.RegisterParser(FirefoxCacheParser)
|