plaso-rubanetra/plaso/parsers/firefox_cache.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements a parser for Firefox cache files."""

import collections
import logging
import os

import construct
import pyparsing

from plaso.events import time_events
from plaso.lib import errors
from plaso.lib import eventdata
from plaso.parsers import interface
from plaso.parsers import manager


__author__ = 'Petter Bjelland (petter.bjelland@gmail.com)'


class FirefoxCacheEvent(time_events.PosixTimeEvent):
  """Convenience class for a Firefox cache record event."""

  DATA_TYPE = 'firefox:cache:record'

  def __init__(self, metadata, request_method, url, response_code):
    super(FirefoxCacheEvent, self).__init__(
        metadata.last_fetched, eventdata.EventTimestamp.ADDED_TIME)

    self.last_modified = metadata.last_modified
    self.major = metadata.major
    self.minor = metadata.minor
    self.location = metadata.location
    self.last_fetched = metadata.last_fetched
    self.expire_time = metadata.expire_time
    self.fetch_count = metadata.fetch_count
    self.request_size = metadata.request_size
    self.info_size = metadata.info_size
    self.data_size = metadata.data_size
    self.request_method = request_method
    self.url = url
    self.response_code = response_code


class FirefoxCacheParser(interface.BaseParser):
  """Extract cached records from Firefox."""

  NAME = 'firefox_cache'
  DESCRIPTION = u'Parser for Firefox Cache files.'

  # Number of bytes allocated to a cache record metadata.
  RECORD_HEADER_SIZE = 36

  # Initial size of Firefox >= 4 cache files.
  INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4

  # Smallest possible block size in Firefox cache files.
  MIN_BLOCK_SIZE = 256

  RECORD_HEADER_STRUCT = construct.Struct(
      'record_header',
      construct.UBInt16('major'),
      construct.UBInt16('minor'),
      construct.UBInt32('location'),
      construct.UBInt32('fetch_count'),
      construct.UBInt32('last_fetched'),
      construct.UBInt32('last_modified'),
      construct.UBInt32('expire_time'),
      construct.UBInt32('data_size'),
      construct.UBInt32('request_size'),
      construct.UBInt32('info_size'))

  ALTERNATIVE_CACHE_NAME = (
      pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word('m', exact=1)
      + pyparsing.Word(pyparsing.nums, exact=2))

  FIREFOX_CACHE_CONFIG = collections.namedtuple(
      u'firefox_cache_config',
      u'block_size first_record_offset')

  REQUEST_METHODS = [
      u'GET', 'HEAD', 'POST', 'PUT', 'DELETE',
      u'TRACE', 'OPTIONS', 'CONNECT', 'PATCH']

  def _GetFirefoxConfig(self, file_entry):
    """Determine cache file block size. Raises exception if not found."""

    if file_entry.name[0:9] != '_CACHE_00':
      try:
        # Match alternative filename. Five hex characters + 'm' + two digit
        # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
        # instead contain data only.
        self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name)
      except pyparsing.ParseException:
        raise errors.UnableToParseFile(u'Not a Firefox cache file.')

    file_object = file_entry.GetFileObject()

    # There ought to be a valid record within the first 4MB. We use this
    # limit to prevent reading large invalid files.
    to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE)

    while file_object.get_offset() < to_read:
      offset = file_object.get_offset()

      try:
        # We have not yet determined the block size, so we use the smallest
        # possible size.
        record = self.__NextRecord(
            file_entry.name, file_object, self.MIN_BLOCK_SIZE)

        record_size = (
            self.RECORD_HEADER_SIZE + record.request_size + record.info_size)

        if record_size >= 4096:
          # _CACHE_003_
          block_size = 4096
        elif record_size >= 1024:
          # _CACHE_002_
          block_size = 1024
        else:
          # _CACHE_001_
          block_size = 256

        return self.FIREFOX_CACHE_CONFIG(block_size, offset)

      except IOError:
        logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
            self.NAME, file_entry.name, offset))

    raise errors.UnableToParseFile(
        u'Could not find a valid cache record. '
        u'Not a Firefox cache file.')

  def __Accept(self, candidate, block_size):
    """Determine whether the candidate is a valid cache record."""

    record_size = (
        self.RECORD_HEADER_SIZE + candidate.request_size+ candidate.info_size)

    return (
        candidate.request_size > 0 and candidate.fetch_count > 0 and
        candidate.major == 1 and record_size // block_size < 256)

  def __NextRecord(self, filename, file_object, block_size):
    """Provide the next cache record."""

    offset = file_object.get_offset()

    try:
      candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object)
    except (IOError, construct.FieldError):
      raise IOError(u'Unable to parse stream.')

    if not self.__Accept(candidate, block_size):
      # Move reader to next candidate block.
      file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR)
      raise IOError(u'Not a valid Firefox cache record.')

    # The last byte in a request is null.
    url = file_object.read(candidate.request_size)[:-1]

    # HTTP response header, even elements are keys, odd elements values.
    headers = file_object.read(candidate.info_size)

    request_method, _, _ = (
        headers.partition('request-method\x00')[2].partition('\x00'))

    _, _, response_head = headers.partition('response-head\x00')

    response_code, _, _ = response_head.partition('\r\n')

    if request_method not in self.REQUEST_METHODS:
      safe_headers = headers.decode('ascii', errors='replace')
      logging.debug((
          u'[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response '
          u'headers: \'{4:s}\'').format(
              self.NAME, filename, offset, request_method, safe_headers))

    if response_code[0:4] != 'HTTP':
      safe_headers = headers.decode('ascii', errors='replace')
      logging.debug((
          u'[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. '
          u'Response headers: \'{3:s}\'.').format(
              self.NAME, filename, offset, safe_headers))

    # A request can span multiple blocks, so we use modulo.
    _, remainder = divmod(file_object.get_offset() - offset, block_size)

    # Move reader to next candidate block. Include the null-byte skipped above.
    file_object.seek(block_size - remainder, os.SEEK_CUR)

    return FirefoxCacheEvent(candidate, request_method, url, response_code)

  def Parse(self, parser_context, file_entry, parser_chain=None):
    """Extract records from a Firefox cache file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      parser_chain: Optional string containing the parsing chain up to this
                    point. The default is None.
    """
    firefox_config = self._GetFirefoxConfig(file_entry)

    # Add ourselves to the parser chain, which will be used in all subsequent
    # event creation in this parser.
    parser_chain = self._BuildParserChain(parser_chain)

    file_object = file_entry.GetFileObject()

    file_object.seek(firefox_config.first_record_offset)

    while file_object.get_offset() < file_object.get_size():
      try:
        event_object = self.__NextRecord(
            file_entry.name, file_object, firefox_config.block_size)

        parser_context.ProduceEvent(
            event_object, parser_chain=parser_chain, file_entry=file_entry)

      except IOError:
        logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid cache record.'.format(
            self.NAME, file_entry.name,
            file_object.get_offset() - self.MIN_BLOCK_SIZE))

    file_object.close()


manager.ParsersManager.RegisterParser(FirefoxCacheParser)