Import from old repository

2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = '1.2.0'
+
+VERSION_DEV = False
+VERSION_DATE = '20141220'
+
+
+def GetVersion():
+  """Returns version information for plaso."""
+  if not VERSION_DEV:
+    return __version__
+
+  return u'{0:s}_{1:s}'.format(__version__, VERSION_DATE)
@@ -0,0 +1,83 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Import statements for analysis plugins and common methods."""
+
+from plaso.analysis import interface
+from plaso.lib import errors
+
+# Import statements of analysis plugins.
+from plaso.analysis import browser_search
+from plaso.analysis import chrome_extension
+from plaso.analysis import windows_services
+
+
+# TODO: move these functions to a manager class. And add a test for this
+# function.
+def ListAllPluginNames(show_all=True):
+  """Return a list of all available plugin names and it's doc string."""
+  results = []
+  for cls_obj in interface.AnalysisPlugin.classes.itervalues():
+    doc_string, _, _ = cls_obj.__doc__.partition('\n')
+
+    obj = cls_obj(None)
+    if not show_all and cls_obj.ENABLE_IN_EXTRACTION:
+      results.append((obj.plugin_name, doc_string, obj.plugin_type))
+    elif show_all:
+      results.append((obj.plugin_name, doc_string, obj.plugin_type))
+
+  return sorted(results)
+
+
+def LoadPlugins(plugin_names, incoming_queues, options=None):
+  """Yield analysis plugins for a given list of plugin names.
+
+  Given a list of plugin names this method finds the analysis
+  plugins, initializes them and returns a generator.
+
+  Args:
+    plugin_names: A list of plugin names that should be loaded up. This
+                  should be a list of strings.
+    incoming_queues: A list of queues (QueueInterface object) that the plugin
+                     uses to read in incoming events to analyse.
+    options: Optional command line arguments (instance of
+        argparse.Namespace). The default is None.
+
+  Yields:
+    Analysis plugin objects (instances of AnalysisPlugin).
+
+  Raises:
+    errors.BadConfigOption: If plugins_names does not contain a list of
+                            strings.
+  """
+  try:
+    plugin_names_lower = [word.lower() for word in plugin_names]
+  except AttributeError:
+    raise errors.BadConfigOption(u'Plugin names should be a list of strings.')
+
+  for plugin_object in interface.AnalysisPlugin.classes.itervalues():
+    plugin_name = plugin_object.NAME.lower()
+
+    if plugin_name in plugin_names_lower:
+      queue_index = plugin_names_lower.index(plugin_name)
+
+      try:
+        incoming_queue = incoming_queues[queue_index]
+      except (TypeError, IndexError):
+        incoming_queue = None
+
+      yield plugin_object(incoming_queue, options)
@@ -0,0 +1,257 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A plugin that extracts browser history from events."""
+
+import collections
+import logging
+import urllib
+
+from plaso import filters
+from plaso.analysis import interface
+from plaso.formatters import manager as formatters_manager
+from plaso.lib import event
+
+
+# Create a lightweight object that is used to store timeline based information
+# about each search term.
+SEARCH_OBJECT = collections.namedtuple(
+    'SEARCH_OBJECT', 'time source engine search_term')
+
+
+def ScrubLine(line):
+  """Scrub the line of most obvious HTML codes.
+
+  An attempt at taking a line and swapping all instances
+  of %XX which represent a character in hex with it's
+  unicode character.
+
+  Args:
+    line: The string that we are about to "fix".
+
+  Returns:
+    String that has it's %XX hex codes swapped for text.
+  """
+  if not line:
+    return ''
+
+  try:
+    return unicode(urllib.unquote(str(line)), 'utf-8')
+  except UnicodeDecodeError:
+    logging.warning(u'Unable to decode line: {0:s}'.format(line))
+
+  return line
+
+
+class FilterClass(object):
+  """A class that contains all the parser functions."""
+
+  @classmethod
+  def _GetBetweenQEqualsAndAmbersand(cls, string):
+    """Return back string that is defined 'q=' and '&'."""
+    if 'q=' not in string:
+      return string
+    _, _, line = string.partition('q=')
+    before_and, _, _ = line.partition('&')
+    if not before_and:
+      return line
+    return before_and.split()[0]
+
+  @classmethod
+  def _SearchAndQInLine(cls, string):
+    """Return a bool indicating if the words q= and search appear in string."""
+    return 'search' in string and 'q=' in string
+
+  @classmethod
+  def GoogleSearch(cls, url):
+    """Return back the extracted string."""
+    if not cls._SearchAndQInLine(url):
+      return
+
+    line = cls._GetBetweenQEqualsAndAmbersand(url)
+    if not line:
+      return
+
+    return line.replace('+', ' ')
+
+  @classmethod
+  def YouTube(cls, url):
+    """Return back the extracted string."""
+    return cls.GenericSearch(url)
+
+  @classmethod
+  def BingSearch(cls, url):
+    """Return back the extracted string."""
+    return cls.GenericSearch(url)
+
+  @classmethod
+  def GenericSearch(cls, url):
+    """Return back the extracted string from a generic search engine."""
+    if not cls._SearchAndQInLine(url):
+      return
+
+    return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
+
+  @classmethod
+  def Yandex(cls, url):
+    """Return back the results from Yandex search engine."""
+    if 'text=' not in url:
+      return
+    _, _, line = url.partition('text=')
+    before_and, _, _ = line.partition('&')
+    if not before_and:
+      return
+    yandex_search_url = before_and.split()[0]
+
+    return yandex_search_url.replace('+', ' ')
+
+  @classmethod
+  def DuckDuckGo(cls, url):
+    """Return back the extracted string."""
+    if not 'q=' in url:
+      return
+    return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
+
+  @classmethod
+  def Gmail(cls, url):
+    """Return back the extracted string."""
+    if 'search/' not in url:
+      return
+
+    _, _, line = url.partition('search/')
+    first, _, _ = line.partition('/')
+    second, _, _ = first.partition('?compose')
+
+    return second.replace('+', ' ')
+
+
+class AnalyzeBrowserSearchPlugin(interface.AnalysisPlugin):
+  """Analyze browser search entries from events."""
+
+  NAME = 'browser_search'
+
+  # Indicate that we do not want to run this plugin during regular extraction.
+  ENABLE_IN_EXTRACTION = False
+
+  # Here we define filters and callback methods for all hits on each filter.
+  FILTERS = (
+      (('url iregexp "(www.|encrypted.|/)google." and url contains "search"'),
+       'GoogleSearch'),
+      ('url contains "youtube.com"', 'YouTube'),
+      (('source is "WEBHIST" and url contains "bing.com" and url contains '
+        '"search"'), 'BingSearch'),
+      ('url contains "mail.google.com"', 'Gmail'),
+      (('source is "WEBHIST" and url contains "yandex.com" and url contains '
+        '"yandsearch"'), 'Yandex'),
+      ('url contains "duckduckgo.com"', 'DuckDuckGo')
+  )
+
+  # We need to implement the interface for analysis plugins, but we don't use
+  # command line options here, so disable checking for unused args.
+  # pylint: disable=unused-argument
+  def __init__(self, incoming_queue, options=None):
+    """Initializes the browser search analysis plugin.
+
+    Args:
+      incoming_queue: A queue that is used to listen to incoming events.
+      options: Optional command line arguments (instance of
+        argparse.Namespace). The default is None.
+    """
+    super(AnalyzeBrowserSearchPlugin, self).__init__(incoming_queue)
+    self._filter_dict = {}
+    self._counter = collections.Counter()
+
+    # Store a list of search terms in a timeline format.
+    # The format is key = timestamp, value = (source, engine, search term).
+    self._search_term_timeline = []
+
+    for filter_str, call_back in self.FILTERS:
+      filter_obj = filters.GetFilter(filter_str)
+      call_back_obj = getattr(FilterClass, call_back, None)
+      if filter_obj and call_back_obj:
+        self._filter_dict[filter_obj] = (call_back, call_back_obj)
+
+  # pylint: enable=unused-argument
+
+  def CompileReport(self):
+    """Compiles a report of the analysis.
+
+    Returns:
+      The analysis report (instance of AnalysisReport).
+    """
+    report = event.AnalysisReport()
+
+    results = {}
+    for key, count in self._counter.iteritems():
+      search_engine, _, search_term = key.partition(':')
+      results.setdefault(search_engine, {})
+      results[search_engine][search_term] = count
+    report.report_dict = results
+    report.report_array = self._search_term_timeline
+
+    lines_of_text = []
+    for search_engine, terms in sorted(results.items()):
+      lines_of_text.append(u' == ENGINE: {0:s} =='.format(search_engine))
+
+      for search_term, count in sorted(
+          terms.iteritems(), key=lambda x: (x[1], x[0]), reverse=True):
+        lines_of_text.append(u'{0:d} {1:s}'.format(count, search_term))
+
+      # An empty string is added to have SetText create an empty line.
+      lines_of_text.append(u'')
+
+    report.SetText(lines_of_text)
+
+    return report
+
+  def ExamineEvent(
+      self, unused_analysis_context, event_object, **unused_kwargs):
+    """Analyzes an event object.
+
+    Args:
+      analysis_context: An analysis context object
+          (instance of AnalysisContext).
+      event_object: An event object (instance of EventObject).
+    """
+    # This event requires an URL attribute.
+    url_attribute = getattr(event_object, 'url', None)
+
+    if not url_attribute:
+      return
+
+    # TODO: refactor this the source should be used in formatting only.
+    # Check if we are dealing with a web history event.
+    source, _ = formatters_manager.EventFormatterManager.GetSourceStrings(
+        event_object)
+
+    if source != 'WEBHIST':
+      return
+
+    for filter_obj, call_backs in self._filter_dict.items():
+      call_back_name, call_back_object = call_backs
+      if filter_obj.Match(event_object):
+        returned_line = ScrubLine(call_back_object(url_attribute))
+        if not returned_line:
+          continue
+        self._counter[u'{0:s}:{1:s}'.format(call_back_name, returned_line)] += 1
+
+        # Add the timeline format for each search term.
+        self._search_term_timeline.append(SEARCH_OBJECT(
+            getattr(event_object, 'timestamp', 0),
+            getattr(event_object, 'plugin', getattr(
+                event_object, 'parser', u'N/A')),
+            call_back_name, returned_line))
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the browser search analysis plugin."""
+
+import unittest
+
+from plaso.analysis import browser_search
+from plaso.analysis import test_lib
+# pylint: disable=unused-import
+from plaso.formatters import chrome as chrome_formatter
+from plaso.lib import event
+from plaso.parsers import sqlite
+from plaso.parsers.sqlite_plugins import chrome
+
+
+class BrowserSearchAnalysisTest(test_lib.AnalysisPluginTestCase):
+  """Tests for the browser search analysis plugin."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self._parser = sqlite.SQLiteParser()
+
+  def testAnalyzeFile(self):
+    """Read a storage file that contains URL data and analyze it."""
+    knowledge_base = self._SetUpKnowledgeBase()
+
+    test_file = self._GetTestFilePath(['History'])
+    event_queue = self._ParseFile(self._parser, test_file, knowledge_base)
+
+    analysis_plugin = browser_search.AnalyzeBrowserSearchPlugin(event_queue)
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    self.assertEquals(len(analysis_reports), 1)
+
+    analysis_report = analysis_reports[0]
+
+    # Due to the behavior of the join one additional empty string at the end
+    # is needed to create the last empty line.
+    expected_text = u'\n'.join([
+        u' == ENGINE: GoogleSearch ==',
+        u'1 really really funny cats',
+        u'1 java plugin',
+        u'1 funnycats.exe',
+        u'1 funny cats',
+        u'',
+        u''])
+
+    self.assertEquals(analysis_report.text, expected_text)
+    self.assertEquals(analysis_report.plugin_name, 'browser_search')
+
+    expected_keys = set([u'GoogleSearch'])
+    self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,201 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A plugin that gather extension ID's from Chrome history browser."""
+
+import logging
+import re
+import urllib2
+
+from plaso.analysis import interface
+from plaso.lib import event
+
+
+class AnalyzeChromeExtensionPlugin(interface.AnalysisPlugin):
+  """Convert Chrome extension ID's into names, requires Internet connection."""
+
+  NAME = 'chrome_extension'
+
+  # Indicate that we can run this plugin during regular extraction.
+  ENABLE_IN_EXTRACTION = True
+
+  _TITLE_RE = re.compile('<title>([^<]+)</title>')
+  _WEB_STORE_URL = u'https://chrome.google.com/webstore/detail/{xid}?hl=en-US'
+
+  # We need to implement the interface for analysis plugins, but we don't use
+  # command line options here, so disable checking for unused args.
+  # pylint: disable=unused-argument
+  def __init__(self, incoming_queue, options=None):
+    """Initializes the Chrome extension analysis plugin.
+
+    Args:
+      incoming_queue: A queue that is used to listen to incoming events.
+      options: Optional command line arguments (instance of
+          argparse.Namespace). The default is None.
+    """
+    super(AnalyzeChromeExtensionPlugin, self).__init__(incoming_queue)
+
+    self._results = {}
+    self.plugin_type = self.TYPE_REPORT
+
+    # TODO: see if these can be moved to arguments passed to ExamineEvent
+    # or some kind of state object.
+    self._sep = None
+    self._user_paths = None
+
+    # Saved list of already looked up extensions.
+    self._extensions = {}
+
+  # pylint: enable=unused-argument
+
+  def _GetChromeWebStorePage(self, extension_id):
+    """Retrieves the page for the extension from the Chrome store website.
+
+    Args:
+      extension_id: string containing the extension identifier.
+    """
+    web_store_url = self._WEB_STORE_URL.format(xid=extension_id)
+    try:
+      response = urllib2.urlopen(web_store_url)
+
+    except urllib2.HTTPError as exception:
+      logging.warning((
+          u'[{0:s}] unable to retrieve URL: {1:s} with error: {2:s}').format(
+              self.NAME, web_store_url, exception))
+      return
+
+    except urllib2.URLError as exception:
+      logging.warning((
+          u'[{0:s}] invalid URL: {1:s} with error: {2:s}').format(
+              self.NAME, web_store_url, exception))
+      return
+
+    return response
+
+  def _GetTitleFromChromeWebStore(self, extension_id):
+    """Retrieves the name of the extension from the Chrome store website.
+
+    Args:
+      extension_id: string containing the extension identifier.
+    """
+    # Check if we have already looked this extension up.
+    if extension_id in self._extensions:
+      return self._extensions.get(extension_id)
+
+    response = self._GetChromeWebStorePage(extension_id)
+    if not response:
+      logging.warning(
+          u'[{0:s}] no data returned for extension identifier: {1:s}'.format(
+              self.NAME, extension_id))
+      return
+
+    first_line = response.readline()
+    match = self._TITLE_RE.search(first_line)
+    if match:
+      title = match.group(1)
+      if title.startswith(u'Chrome Web Store - '):
+        name = title[19:]
+      elif title.endswith(u'- Chrome Web Store'):
+        name = title[:-19]
+
+      self._extensions[extension_id] = name
+      return name
+
+    self._extensions[extension_id] = u'Not Found'
+
+  def CompileReport(self):
+    """Compiles a report of the analysis.
+
+    Returns:
+      The analysis report (instance of AnalysisReport).
+    """
+    report = event.AnalysisReport()
+
+    report.report_dict = self._results
+
+    lines_of_text = []
+    for user, extensions in sorted(self._results.iteritems()):
+      lines_of_text.append(u' == USER: {0:s} =='.format(user))
+      for extension, extension_id in sorted(extensions):
+        lines_of_text.append(u'  {0:s} [{1:s}]'.format(extension, extension_id))
+
+      # An empty string is added to have SetText create an empty line.
+      lines_of_text.append(u'')
+
+    report.SetText(lines_of_text)
+
+    return report
+
+  def ExamineEvent(self, analysis_context, event_object, **unused_kwargs):
+    """Analyzes an event object.
+
+    Args:
+      analysis_context: An analysis context object
+        (instance of AnalysisContext).
+      event_object: An event object (instance of EventObject).
+    """
+    # Only interested in filesystem events.
+    if event_object.data_type != 'fs:stat':
+      return
+
+    filename = getattr(event_object, 'filename', None)
+    if not filename:
+      return
+
+    # Determine if we have a Chrome extension ID.
+    if u'chrome' not in filename.lower():
+      return
+
+    if not self._sep:
+      self._sep = analysis_context.GetPathSegmentSeparator(filename)
+
+    if not self._user_paths:
+      self._user_paths = analysis_context.GetUserPaths(analysis_context.users)
+
+    if u'{0:s}Extensions{0:s}'.format(self._sep) not in filename:
+      return
+
+    # Now we have extension ID's, let's check if we've got the
+    # folder, nothing else.
+    paths = filename.split(self._sep)
+    if paths[-2] != u'Extensions':
+      return
+
+    extension_id = paths[-1]
+    if extension_id == u'Temp':
+      return
+
+    # Get the user and ID.
+    user = analysis_context.GetUsernameFromPath(
+        self._user_paths, filename, self._sep)
+
+    # We still want this information in here, so that we can
+    # manually deduce the username.
+    if not user:
+      if len(filename) > 25:
+        user = u'Not found ({0:s}...)'.format(filename[0:25])
+      else:
+        user = u'Not found ({0:s})'.format(filename)
+
+    extension = self._GetTitleFromChromeWebStore(extension_id)
+    if not extension:
+      extension = extension_id
+
+    self._results.setdefault(user, [])
+    extension_string = extension.decode('utf-8', 'ignore')
+    if (extension_string, extension_id) not in self._results[user]:
+      self._results[user].append((extension_string, extension_id))
@@ -0,0 +1,196 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the chrome extension analysis plugin."""
+
+import os
+import unittest
+
+from plaso.analysis import chrome_extension
+from plaso.analysis import test_lib
+from plaso.engine import queue
+from plaso.engine import single_process
+from plaso.lib import event
+
+# We are accessing quite a lot of protected members in this test file.
+# Suppressing that message test file wide.
+# pylint: disable=protected-access
+
+
+class AnalyzeChromeExtensionTestPlugin(
+    chrome_extension.AnalyzeChromeExtensionPlugin):
+  """Chrome extension analysis plugin used for testing."""
+
+  NAME = 'chrome_extension_test'
+
+  _TEST_DATA_PATH = os.path.join(
+      os.getcwd(), u'test_data', u'chrome_extensions')
+
+  def _GetChromeWebStorePage(self, extension_id):
+    """Retrieves the page for the extension from the Chrome store test data.
+
+    Args:
+      extension_id: string containing the extension identifier.
+    """
+    chrome_web_store_file = os.path.join(self._TEST_DATA_PATH, extension_id)
+    if not os.path.exists(chrome_web_store_file):
+      return
+
+    return open(chrome_web_store_file, 'rb')
+
+
+class ChromeExtensionTest(test_lib.AnalysisPluginTestCase):
+  """Tests for the chrome extension analysis plugin."""
+
+  # Few config options here.
+  MAC_PATHS = [
+      '/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions',
+      ('/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions/'
+       'apdfllckaahabafndbhieahigkjlhalf'),
+      '/private/var/log/system.log',
+      '/Users/frank/Library/Application Data/Google/Chrome/Default',
+      '/Users/hans/Library/Application Data/Google/Chrome/Default',
+      ('/Users/frank/Library/Application Data/Google/Chrome/Default/'
+       'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'),
+      '/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',]
+
+  WIN_PATHS = [
+      'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions',
+      ('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\'
+       'hmjkmjkepdijhoojdojkdfohbdgmmhki'),
+      ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
+       'blpcfgokakmgnkcojhhkbfbldkacnbeo'),
+      '\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions',
+      ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
+       'icppfcnhkcmnfdhfhphakoifcfokfdhg'),
+      'C:\\Windows\\System32',
+      '\\Stuff/with path separator\\Folder']
+
+  MAC_USERS = [
+      {u'name': u'root', u'path': u'/var/root', u'sid': u'0'},
+      {u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'},
+      {u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'},
+      {u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}]
+
+  WIN_USERS = [
+      {u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'},
+      {u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}]
+
+  def _CreateTestEventObject(self, path):
+    """Create a test event object with a particular path."""
+    event_object = event.EventObject()
+    event_object.data_type = 'fs:stat'
+    event_object.timestamp = 12345
+    event_object.timestamp_desc = u'Some stuff'
+    event_object.filename = path
+
+    return event_object
+
+  def testMacAnalyzerPlugin(self):
+    """Test the plugin against mock events."""
+    knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={
+        'users': self.MAC_USERS})
+
+    event_queue = single_process.SingleProcessQueue()
+
+    # Fill the incoming queue with events.
+    test_queue_producer = queue.ItemQueueProducer(event_queue)
+    test_queue_producer.ProduceItems([
+        self._CreateTestEventObject(path) for path in self.MAC_PATHS])
+    test_queue_producer.SignalEndOfInput()
+
+    # Initialize plugin.
+    analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)
+
+    # Run the analysis plugin.
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    self.assertEquals(len(analysis_reports), 1)
+
+    analysis_report = analysis_reports[0]
+
+    self.assertEquals(analysis_plugin._sep, u'/')
+
+    # Due to the behavior of the join one additional empty string at the end
+    # is needed to create the last empty line.
+    expected_text = u'\n'.join([
+        u' == USER: dude ==',
+        u'  Google Drive [apdfllckaahabafndbhieahigkjlhalf]',
+        u'',
+        u' == USER: frank ==',
+        u'  Gmail [pjkljhegncpnkpknbcohdijeoejaedia]',
+        u'',
+        u''])
+
+    self.assertEquals(analysis_report.text, expected_text)
+    self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')
+
+    expected_keys = set([u'frank', u'dude'])
+    self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
+
+  def testWinAnalyzePlugin(self):
+    """Test the plugin against mock events."""
+    knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={
+        'users': self.WIN_USERS})
+
+    event_queue = single_process.SingleProcessQueue()
+
+    # Fill the incoming queue with events.
+    test_queue_producer = queue.ItemQueueProducer(event_queue)
+    test_queue_producer.ProduceItems([
+        self._CreateTestEventObject(path) for path in self.WIN_PATHS])
+    test_queue_producer.SignalEndOfInput()
+
+    # Initialize plugin.
+    analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)
+
+    # Run the analysis plugin.
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    self.assertEquals(len(analysis_reports), 1)
+
+    analysis_report = analysis_reports[0]
+
+    self.assertEquals(analysis_plugin._sep, u'\\')
+
+    # Due to the behavior of the join one additional empty string at the end
+    # is needed to create the last empty line.
+    expected_text = u'\n'.join([
+        u' == USER: dude ==',
+        u'  Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]',
+        u'',
+        u' == USER: frank ==',
+        u'  Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]',
+        u'  YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]',
+        u'',
+        u''])
+
+    self.assertEquals(analysis_report.text, expected_text)
+    self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')
+
+    expected_keys = set([u'frank', u'dude'])
+    self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The analysis context object."""
+
+
+class AnalysisContext(object):
+  """Class that implements the analysis context."""
+
+  def __init__(self, analysis_report_queue_producer, knowledge_base):
+    """Initializes a analysis plugin context object.
+
+    Args:
+      analysis_report_queue_producer: the analysis report queue producer
+                                      (instance of ItemQueueProducer).
+      knowledge_base: A knowledge base object (instance of KnowledgeBase),
+                      which contains information from the source data needed
+                      for analysis.
+    """
+    super(AnalysisContext, self).__init__()
+    self._analysis_report_queue_producer = analysis_report_queue_producer
+    self._knowledge_base = knowledge_base
+
+    self.number_of_produced_analysis_reports = 0
+
+  @property
+  def users(self):
+    """The list of users."""
+    return self._knowledge_base.users
+
+  def GetPathSegmentSeparator(self, path):
+    """Given a path give back the path separator as a best guess.
+
+    Args:
+      path: the path.
+
+    Returns:
+      The path segment separator.
+    """
+    if path.startswith(u'\\') or path[1:].startswith(u':\\'):
+      return u'\\'
+
+    if path.startswith(u'/'):
+      return u'/'
+
+    if u'/' and u'\\' in path:
+      # Let's count slashes and guess which one is the right one.
+      forward_count = len(path.split(u'/'))
+      backward_count = len(path.split(u'\\'))
+
+      if forward_count > backward_count:
+        return u'/'
+      else:
+        return u'\\'
+
+    # Now we are sure there is only one type of separators yet
+    # the path does not start with one.
+    if u'/' in path:
+      return u'/'
+    else:
+      return u'\\'
+
+  def GetUsernameFromPath(self, user_paths, file_path, path_segment_separator):
+    """Return a username based on preprocessing and the path.
+
+    During preprocessing the tool will gather file paths to where each user
+    profile is stored, and which user it belongs to. This function takes in
+    a path to a file and compares it to a list of all discovered usernames
+    and paths to their profiles in the system. If it finds that the file path
+    belongs to a user profile it will return the username that the profile
+    belongs to.
+
+    Args:
+      user_paths: A dictionary object containing the paths per username.
+      file_path: The full path to the file being analyzed.
+      path_segment_separator: String containing the path segment separator.
+
+    Returns:
+      If possible the responsible username behind the file. Otherwise None.
+    """
+    if not user_paths:
+      return
+
+    if path_segment_separator != u'/':
+      use_path = file_path.replace(path_segment_separator, u'/')
+    else:
+      use_path = file_path
+
+    if use_path[1:].startswith(u':/'):
+      use_path = use_path[2:]
+
+    use_path = use_path.lower()
+
+    for user, path in user_paths.iteritems():
+      if use_path.startswith(path):
+        return user
+
+  def GetUserPaths(self, users):
+    """Retrieves the user paths.
+
+    Args:
+      users: a list of users.
+
+    Returns:
+      A dictionary object containing the paths per username or None if no users.
+    """
+    if not users:
+      return
+
+    user_paths = {}
+
+    user_separator = None
+    for user in users:
+      name = user.get('name')
+      path = user.get('path')
+
+      if not path or not name:
+        continue
+
+      if not user_separator:
+        user_separator = self.GetPathSegmentSeparator(path)
+
+      if user_separator != u'/':
+        path = path.replace(user_separator, u'/').replace(u'//', u'/')
+
+      if path[1:].startswith(u':/'):
+        path = path[2:]
+
+      name = name.lower()
+      user_paths[name] = path.lower()
+
+    return user_paths
+
+  def ProcessAnalysisReport(self, analysis_report, plugin_name=None):
+    """Processes an analysis report before it is emitted to the queue.
+
+    Args:
+      analysis_report: the analysis report object (instance of AnalysisReport).
+      plugin_name: Optional name of the plugin. The default is None.
+    """
+    if not getattr(analysis_report, 'plugin_name', None) and plugin_name:
+      analysis_report.plugin_name = plugin_name
+
+  def ProduceAnalysisReport(self, analysis_report, plugin_name=None):
+    """Produces an analysis report onto the queue.
+
+    Args:
+      analysis_report: the analysis report object (instance of AnalysisReport).
+      plugin_name: Optional name of the plugin. The default is None.
+    """
+    self.ProcessAnalysisReport(analysis_report, plugin_name=plugin_name)
+
+    self._analysis_report_queue_producer.ProduceItem(analysis_report)
+    self.number_of_produced_analysis_reports += 1
@@ -0,0 +1,134 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the analysis context."""
+
+import unittest
+
+from plaso.analysis import context
+from plaso.analysis import test_lib
+from plaso.engine import queue
+from plaso.engine import single_process
+
+
+class AnalysisContextTest(test_lib.AnalysisPluginTestCase):
+  """Tests for the analysis context."""
+
+  MAC_PATHS = [
+      '/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions',
+      ('/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions/'
+       'apdfllckaahabafndbhieahigkjlhalf'),
+      '/private/var/log/system.log',
+      '/Users/frank/Library/Application Data/Google/Chrome/Default',
+      '/Users/hans/Library/Application Data/Google/Chrome/Default',
+      ('/Users/frank/Library/Application Data/Google/Chrome/Default/'
+       'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'),
+      '/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',]
+
+  WIN_PATHS = [
+      'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions',
+      ('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\'
+       'hmjkmjkepdijhoojdojkdfohbdgmmhki'),
+      ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
+       'blpcfgokakmgnkcojhhkbfbldkacnbeo'),
+      '\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions',
+      ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
+       'icppfcnhkcmnfdhfhphakoifcfokfdhg'),
+      'C:\\Windows\\System32',
+      '\\Stuff/with path separator\\Folder']
+
+  MAC_USERS = [
+      {u'name': u'root', u'path': u'/var/root', u'sid': u'0'},
+      {u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'},
+      {u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'},
+      {u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}]
+
+  WIN_USERS = [
+      {u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'},
+      {u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}]
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    knowledge_base = self._SetUpKnowledgeBase()
+
+    analysis_report_queue = single_process.SingleProcessQueue()
+    analysis_report_queue_producer = queue.ItemQueueProducer(
+        analysis_report_queue)
+
+    self._analysis_context = context.AnalysisContext(
+        analysis_report_queue_producer, knowledge_base)
+
+  def testGetPathSegmentSeparator(self):
+    """Tests the GetPathSegmentSeparator function."""
+    for path in self.MAC_PATHS:
+      path_segment_separator = self._analysis_context.GetPathSegmentSeparator(
+          path)
+      self.assertEquals(path_segment_separator, u'/')
+
+    for path in self.WIN_PATHS:
+      path_segment_separator = self._analysis_context.GetPathSegmentSeparator(
+          path)
+      self.assertEquals(path_segment_separator, u'\\')
+
+  def testGetUserPaths(self):
+    """Tests the GetUserPaths function."""
+    user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS)
+    self.assertEquals(
+        set(user_paths.keys()), set([u'frank', u'dude', u'hans', u'root']))
+    self.assertEquals(user_paths[u'frank'], u'/users/frank')
+    self.assertEquals(user_paths[u'dude'], u'/users/dude')
+    self.assertEquals(user_paths[u'hans'], u'/users/hans')
+    self.assertEquals(user_paths[u'root'], u'/var/root')
+
+    user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS)
+    self.assertEquals(set(user_paths.keys()), set([u'frank', u'dude']))
+    self.assertEquals(user_paths[u'frank'], u'/users/frank')
+    self.assertEquals(user_paths[u'dude'], u'/users/dude')
+
+  def testGetUsernameFromPath(self):
+    """Tests the GetUsernameFromPath function."""
+    user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS)
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.MAC_PATHS[0], u'/')
+    self.assertEquals(username, u'dude')
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.MAC_PATHS[4], u'/')
+    self.assertEquals(username, u'hans')
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.WIN_PATHS[0], u'/')
+    self.assertEquals(username, None)
+
+    user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS)
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.WIN_PATHS[0], u'\\')
+    self.assertEquals(username, u'dude')
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.WIN_PATHS[2], u'\\')
+    self.assertEquals(username, u'frank')
+
+    username = self._analysis_context.GetUsernameFromPath(
+        user_paths, self.MAC_PATHS[2], u'\\')
+    self.assertEquals(username, None)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,139 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains basic interface for analysis plugins."""
+
+import abc
+
+from plaso.engine import queue
+from plaso.lib import registry
+from plaso.lib import timelib
+
+
+class AnalysisPlugin(queue.EventObjectQueueConsumer):
+  """Analysis plugin gets a copy of each read event for analysis."""
+
+  __metaclass__ = registry.MetaclassRegistry
+  __abstract = True
+
+  # The URLS should contain a list of URLs with additional information about
+  # this analysis plugin.
+  URLS = []
+
+  # The name of the plugin. This is the name that is matched against when
+  # loading plugins, so it is important that this name is short, concise and
+  # explains the nature of the plugin easily. It also needs to be unique.
+  NAME = 'Plugin'
+
+  # A flag indicating whether or not this plugin should be run during extraction
+  # phase or reserved entirely for post processing stage.
+  # Typically this would mean that the plugin is perhaps too computationally
+  # heavy to be run during event extraction and should rather be run during
+  # post-processing.
+  # Since most plugins should perhaps rather be run during post-processing
+  # this is set to False by default and needs to be overwritten if the plugin
+  # should be able to run during the extraction phase.
+  ENABLE_IN_EXTRACTION = False
+
+  # All the possible report types.
+  TYPE_ANOMALY = 1    # Plugin that is inspecting events for anomalies.
+  TYPE_STATISTICS = 2   # Statistical calculations.
+  TYPE_ANNOTATION = 3    # Inspecting events with the primary purpose of
+                         # annotating or tagging them.
+  TYPE_REPORT = 4    # Inspecting events to provide a summary information.
+
+  # Optional arguments to be added to the argument parser.
+  # An example would be:
+  #   ARGUMENTS = [('--myparameter', {
+  #       'action': 'store',
+  #       'help': 'This is my parameter help',
+  #       'dest': 'myparameter',
+  #       'default': '',
+  #       'type': 'unicode'})]
+  #
+  # Where all arguments into the dict object have a direct translation
+  # into the argparse parser.
+  ARGUMENTS = []
+
+  # We need to implement the interface for analysis plugins, but we don't use
+  # command line options here, so disable checking for unused args.
+  # pylint: disable=unused-argument
+  def __init__(self, incoming_queue, options=None):
+    """Initializes an analysis plugin.
+
+    Args:
+      incoming_queue: A queue that is used to listen to incoming events.
+      options: Optional command line arguments (instance of
+        argparse.Namespace). The default is None.
+    """
+    super(AnalysisPlugin, self).__init__(incoming_queue)
+    self.plugin_type = self.TYPE_REPORT
+
+  # pylint: enable=unused-argument
+  def _ConsumeEventObject(self, event_object, analysis_context=None, **kwargs):
+    """Consumes an event object callback for ConsumeEventObjects.
+
+    Args:
+      event_object: An event object (instance of EventObject).
+      analysis_context: Optional analysis context object (instance of
+                        AnalysisContext). The default is None.
+    """
+    self.ExamineEvent(analysis_context, event_object, **kwargs)
+
+  @property
+  def plugin_name(self):
+    """Return the name of the plugin."""
+    return self.NAME
+
+  @abc.abstractmethod
+  def CompileReport(self):
+    """Compiles a report of the analysis.
+
+    After the plugin has received every copy of an event to
+    analyze this function will be called so that the report
+    can be assembled.
+
+    Returns:
+      The analysis report (instance of AnalysisReport).
+    """
+
+  @abc.abstractmethod
+  def ExamineEvent(self, analysis_context, event_object, **kwargs):
+    """Analyzes an event object.
+
+    Args:
+      analysis_context: An analysis context object (instance of
+        AnalysisContext).
+      event_object: An event object (instance of EventObject).
+    """
+
+  def RunPlugin(self, analysis_context):
+    """For each item in the queue send the read event to analysis.
+
+    Args:
+      analysis_context: An analysis context object (instance of
+        AnalysisContext).
+    """
+    self.ConsumeEventObjects(analysis_context=analysis_context)
+
+    analysis_report = self.CompileReport()
+
+    if analysis_report:
+      # TODO: move this into the plugins?
+      analysis_report.time_compiled = timelib.Timestamp.GetNow()
+      analysis_context.ProduceAnalysisReport(
+          analysis_report, plugin_name=self.plugin_name)
@@ -0,0 +1,171 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Analysis plugin related functions and classes for testing."""
+
+import os
+import unittest
+
+from dfvfs.lib import definitions
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.analysis import context
+from plaso.artifacts import knowledge_base
+from plaso.engine import queue
+from plaso.engine import single_process
+from plaso.lib import event
+from plaso.parsers import context as parsers_context
+
+
+class TestAnalysisReportQueueConsumer(queue.ItemQueueConsumer):
+  """Class that implements a test analysis report queue consumer."""
+
+  def __init__(self, queue_object):
+    """Initializes the queue consumer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(TestAnalysisReportQueueConsumer, self).__init__(queue_object)
+    self.analysis_reports = []
+
+  def _ConsumeItem(self, analysis_report):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      analysis_report: the analysis report (instance of AnalysisReport).
+    """
+    self.analysis_reports.append(analysis_report)
+
+  @property
+  def number_of_analysis_reports(self):
+    """The number of analysis reports."""
+    return len(self.analysis_reports)
+
+
+class AnalysisPluginTestCase(unittest.TestCase):
+  """The unit test case for an analysis plugin."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
+
+  # Show full diff results, part of TestCase so does not follow our naming
+  # conventions.
+  maxDiff = None
+
+  def _GetAnalysisReportsFromQueue(self, analysis_report_queue_consumer):
+    """Retrieves the analysis reports from the queue consumer.
+
+    Args:
+      analysis_report_queue_consumer: the analysis report queue consumer
+                                      object (instance of
+                                      TestAnalysisReportQueueConsumer).
+
+    Returns:
+      A list of analysis reports (instances of AnalysisReport).
+    """
+    analysis_report_queue_consumer.ConsumeItems()
+
+    analysis_reports = []
+    for analysis_report in analysis_report_queue_consumer.analysis_reports:
+      self.assertIsInstance(analysis_report, event.AnalysisReport)
+      analysis_reports.append(analysis_report)
+
+    return analysis_reports
+
+  def _GetTestFilePath(self, path_segments):
+    """Retrieves the path of a test file relative to the test data directory.
+
+    Args:
+      path_segments: the path segments inside the test data directory.
+
+    Returns:
+      A path of the test file.
+    """
+    # Note that we need to pass the individual path segments to os.path.join
+    # and not a list.
+    return os.path.join(self._TEST_DATA_PATH, *path_segments)
+
+  def _ParseFile(self, parser_object, path, knowledge_base_object):
+    """Parses a file using the parser object.
+
+    Args:
+      parser_object: the parser object.
+      path: the path of the file to parse.
+      knowledge_base_object: the knowledge base object (instance of
+                             KnowledgeBase).
+
+    Returns:
+      An event object queue object (instance of Queue).
+    """
+    event_queue = single_process.SingleProcessQueue()
+    event_queue_producer = queue.ItemQueueProducer(event_queue)
+
+    parse_error_queue = single_process.SingleProcessQueue()
+
+    parser_context = parsers_context.ParserContext(
+        event_queue_producer, parse_error_queue, knowledge_base_object)
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        definitions.TYPE_INDICATOR_OS, location=path)
+    file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
+
+    parser_object.Parse(parser_context, file_entry)
+    event_queue.SignalEndOfInput()
+
+    return event_queue
+
+  def _RunAnalysisPlugin(self, analysis_plugin, knowledge_base_object):
+    """Analyzes an event object queue using the plugin object.
+
+    Args:
+      analysis_plugin: the analysis plugin object (instance of AnalysisPlugin).
+      knowledge_base_object: the knowledge base object (instance of
+                             KnowledgeBase).
+
+    Returns:
+      An event object queue object (instance of Queue).
+    """
+    analysis_report_queue = single_process.SingleProcessQueue()
+    analysis_report_queue_consumer = TestAnalysisReportQueueConsumer(
+        analysis_report_queue)
+    analysis_report_queue_producer = queue.ItemQueueProducer(
+        analysis_report_queue)
+
+    analysis_context = context.AnalysisContext(
+        analysis_report_queue_producer, knowledge_base_object)
+
+    analysis_plugin.RunPlugin(analysis_context)
+    analysis_report_queue.SignalEndOfInput()
+
+    return analysis_report_queue_consumer
+
+  def _SetUpKnowledgeBase(self, knowledge_base_values=None):
+    """Sets up a knowledge base.
+
+    Args:
+      knowledge_base_values: optional dict containing the knowledge base
+                             values. The default is None.
+
+    Returns:
+      An knowledge base object (instance of KnowledgeBase).
+    """
+    knowledge_base_object = knowledge_base.KnowledgeBase()
+    if knowledge_base_values:
+      for identifier, value in knowledge_base_values.iteritems():
+        knowledge_base_object.SetValue(identifier, value)
+
+    return knowledge_base_object
@@ -0,0 +1,267 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A plugin to enable quick triage of Windows Services."""
+
+from plaso.analysis import interface
+from plaso.lib import event
+from plaso.winnt import human_readable_service_enums
+
+# Moving this import to the bottom due to complaints from certain versions of
+# linters.
+import yaml
+
+
+class WindowsService(yaml.YAMLObject):
+  """Class to represent a Windows Service."""
+  # This is used for comparison operations and defines attributes that should
+  # not be used during evaluation of whether two services are the same.
+  COMPARE_EXCLUDE = frozenset(['sources'])
+
+  KEY_PATH_SEPARATOR = u'\\'
+
+  # YAML attributes
+  yaml_tag = u'!WindowsService'
+  yaml_loader = yaml.SafeLoader
+  yaml_dumper = yaml.SafeDumper
+
+
+  def __init__(self, name, service_type, image_path, start_type, object_name,
+               source, service_dll=None):
+    """Initializes a new Windows service object.
+
+    Args:
+      name: The name of the service
+      service_type: The value of the Type value of the service key.
+      image_path: The value of the ImagePath value of the service key.
+      start_type: The value of the Start value of the service key.
+      object_name: The value of the ObjectName value of the service key.
+      source: A tuple of (pathspec, Registry key) describing where the
+          service was found
+      service_dll: Optional string value of the ServiceDll value in the
+          service's Parameters subkey. The default is None.
+
+    Raises:
+      TypeError: If a tuple with two elements is not passed as the 'source'
+      argument.
+    """
+    self.name = name
+    self.service_type = service_type
+    self.image_path = image_path
+    self.start_type = start_type
+    self.service_dll = service_dll
+    self.object_name = object_name
+    if isinstance(source, tuple):
+      if len(source) != 2:
+        raise TypeError(u'Source arguments must be tuple of length 2.')
+      # A service may be found in multiple Control Sets or Registry hives,
+      # hence the list.
+      self.sources = [source]
+    else:
+      raise TypeError(u'Source argument must be a tuple.')
+    self.anomalies = []
+
+  @classmethod
+  def FromEvent(cls, service_event):
+    """Creates a Service object from an plaso event.
+
+    Args:
+      service_event: The event object (instance of EventObject) to create a new
+          Service object from.
+
+    """
+    _, _, name = service_event.keyname.rpartition(
+        WindowsService.KEY_PATH_SEPARATOR)
+    service_type = service_event.regvalue.get('Type')
+    image_path = service_event.regvalue.get('ImagePath')
+    start_type = service_event.regvalue.get('Start')
+    service_dll = service_event.regvalue.get('ServiceDll', u'')
+    object_name = service_event.regvalue.get('ObjectName', u'')
+    if service_event.pathspec:
+      source = (service_event.pathspec.location, service_event.keyname)
+    else:
+      source = (u'Unknown', u'Unknown')
+    return cls(
+        name=name, service_type=service_type, image_path=image_path,
+        start_type=start_type, object_name=object_name,
+        source=source, service_dll=service_dll)
+
+  def HumanReadableType(self):
+    """Return a human readable string describing the type value."""
+    return human_readable_service_enums.SERVICE_ENUMS['Type'].get(
+        self.service_type, u'{0:d}'.format(self.service_type))
+
+  def HumanReadableStartType(self):
+    """Return a human readable string describing the start_type value."""
+    return human_readable_service_enums.SERVICE_ENUMS['Start'].get(
+        self.start_type, u'{0:d}'.format(self.start_type))
+
+  def __eq__(self, other_service):
+    """Custom equality method so that we match near-duplicates.
+
+    Compares two service objects together and evaluates if they are
+    the same or close enough to be considered to represent the same service.
+
+    For two service objects to be considered the same they need to
+    have the the same set of attributes and same values for all their
+    attributes, other than those enumerated as reserved in the
+    COMPARE_EXCLUDE constant.
+
+    Args:
+      other_service: The service (instance of WindowsService) we are testing
+      for equality.
+
+    Returns:
+      A boolean value to indicate whether the services are equal.
+
+    """
+    if not isinstance(other_service, WindowsService):
+      return False
+
+    attributes = set(self.__dict__.keys())
+    other_attributes = set(self.__dict__.keys())
+
+    if attributes != other_attributes:
+      return False
+
+    # We compare the values for all attributes, other than those specifically
+    # enumerated as not relevant for equality comparisons.
+    for attribute in attributes.difference(self.COMPARE_EXCLUDE):
+      if getattr(self, attribute, None) != getattr(
+          other_service, attribute, None):
+        return False
+
+    return True
+
+
+class WindowsServiceCollection(object):
+  """Class to hold and de-duplicate Windows Services."""
+
+  def __init__(self):
+    """Initialize a collection that holds Windows Service."""
+    self._services = []
+
+  def AddService(self, new_service):
+    """Add a new service to the list of ones we know about.
+
+    Args:
+      new_service: The service (instance of WindowsService) to add.
+    """
+    for service in self._services:
+      if new_service == service:
+        # If this service is the same as one we already know about, we
+        # just want to add where it came from.
+        service.sources.append(new_service.sources[0])
+        return
+    # We only add a new object to our list if we don't have
+    # an identical one already.
+    self._services.append(new_service)
+
+  @property
+  def services(self):
+    """Get the services in this collection."""
+    return self._services
+
+
+class AnalyzeWindowsServicesPlugin(interface.AnalysisPlugin):
+  """Provides a single list of for Windows services found in the Registry."""
+
+  NAME = 'windows_services'
+
+  # Indicate that we can run this plugin during regular extraction.
+  ENABLE_IN_EXTRACTION = True
+
+  ARGUMENTS = [
+      ('--windows-services-output', {
+          'dest': 'windows-services-output',
+          'type': unicode,
+          'help': 'Specify how the results should be displayed. Options are '
+                  'text and yaml.',
+          'action': 'store',
+          'default': u'text',
+          'choices': [u'text', u'yaml']}),]
+
+  def __init__(self, incoming_queue, options=None):
+    """Initializes the Windows Services plugin
+
+    Args:
+      incoming_queue: A queue to read events from.
+      options: Optional command line arguments (instance of
+      argparse.Namespace). The default is None.
+    """
+    super(AnalyzeWindowsServicesPlugin, self).__init__(incoming_queue)
+    self._service_collection = WindowsServiceCollection()
+    self.plugin_type = interface.AnalysisPlugin.TYPE_REPORT
+    self._output_mode = getattr(options, 'windows-services-output', u'text')
+
+  def ExamineEvent(self, analysis_context, event_object, **kwargs):
+    """Analyzes an event_object and creates Windows Services as required.
+
+      At present, this method only handles events extracted from the Registry.
+
+    Args:
+      analysis_context: The context object analysis plugins.
+      event_object: The event object (instance of EventObject) to examine.
+    """
+    # TODO: Handle event log entries here also (ie, event id 4697).
+    if getattr(event_object, 'data_type', None) != 'windows:registry:service':
+      return
+    else:
+      # Create and store the service.
+      service = WindowsService.FromEvent(event_object)
+      self._service_collection.AddService(service)
+
+  def _FormatServiceText(self, service):
+    """Produces a human readable multi-line string representing the service.
+
+    Args:
+      service: The service (instance of WindowsService) to format.
+    """
+    string_segments = [
+        service.name,
+        u'\tImage Path    = {0:s}'.format(service.image_path),
+        u'\tService Type  = {0:s}'.format(service.HumanReadableType()),
+        u'\tStart Type    = {0:s}'.format(service.HumanReadableStartType()),
+        u'\tService Dll   = {0:s}'.format(service.service_dll),
+        u'\tObject Name   = {0:s}'.format(service.object_name),
+        u'\tSources:']
+    for source in service.sources:
+      string_segments.append(u'\t\t{0:s}:{1:s}'.format(source[0], source[1]))
+    return u'\n'.join(string_segments)
+
+  def CompileReport(self):
+    """Compiles a report of the analysis.
+
+    Returns:
+      The analysis report (instance of AnalysisReport).
+    """
+    report = event.AnalysisReport()
+
+    if self._output_mode == 'yaml':
+      lines_of_text = []
+      lines_of_text.append(
+          yaml.safe_dump_all(self._service_collection.services))
+    else:
+      lines_of_text = ['Listing Windows Services']
+      for service in self._service_collection.services:
+        lines_of_text.append(self._FormatServiceText(service))
+        # Separate services with a blank line.
+        lines_of_text.append(u'')
+
+    report.SetText(lines_of_text)
+
+    return report
@@ -0,0 +1,192 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the windows services analysis plugin."""
+
+import argparse
+import unittest
+
+from dfvfs.path import fake_path_spec
+
+from plaso.analysis import test_lib
+from plaso.analysis import windows_services
+from plaso.engine import queue
+from plaso.engine import single_process
+from plaso.events import windows_events
+from plaso.parsers import winreg
+
+
+class WindowsServicesTest(test_lib.AnalysisPluginTestCase):
+  """Tests for the Windows Services analysis plugin."""
+
+  SERVICE_EVENTS = [
+      {u'path': u'\\ControlSet001\\services\\TestbDriver',
+       u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2,
+                      u'Start': 2, u'ObjectName': u''},
+       u'timestamp': 1346145829002031},
+      # This is almost the same, but different timestamp and source, so that
+      # we can test the service de-duplication.
+      {u'path': u'\\ControlSet003\\services\\TestbDriver',
+       u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2,
+                      u'Start': 2, u'ObjectName': u''},
+       u'timestamp': 1346145839002031},
+  ]
+
+  def _CreateAnalysisPlugin(self, input_queue, output_mode):
+    """Create an analysis plugin to test with.
+
+    Args:
+      input_queue: A queue the plugin will read events from.
+      output_mode: The output format the plugin will use.
+          Valid options are 'text' and 'yaml'.
+
+    Returns:
+      An instance of AnalyzeWindowsServicesPlugin.
+    """
+    argument_parser = argparse.ArgumentParser()
+    plugin_args = windows_services.AnalyzeWindowsServicesPlugin.ARGUMENTS
+    for parameter, config in plugin_args:
+      argument_parser.add_argument(parameter, **config)
+    arguments = ['--windows-services-output', output_mode]
+    options = argument_parser.parse_args(arguments)
+    analysis_plugin = windows_services.AnalyzeWindowsServicesPlugin(
+        input_queue, options)
+    return analysis_plugin
+
+
+  def _CreateTestEventObject(self, service_event):
+    """Create a test event object with a particular path.
+
+    Args:
+      service_event: A hash containing attributes of an event to add to the
+                     queue.
+
+    Returns:
+      An EventObject representing the service to be created.
+    """
+    test_pathspec = fake_path_spec.FakePathSpec(
+        location=u'C:\\WINDOWS\\system32\\SYSTEM')
+    event_object = windows_events.WindowsRegistryServiceEvent(
+        service_event[u'timestamp'], service_event[u'path'],
+        service_event[u'text_dict'])
+    event_object.pathspec = test_pathspec
+    return event_object
+
+  def testSyntheticKeysText(self):
+    """Test the plugin against mock events."""
+    event_queue = single_process.SingleProcessQueue()
+
+    # Fill the incoming queue with events.
+    test_queue_producer = queue.ItemQueueProducer(event_queue)
+    events = [self._CreateTestEventObject(service_event)
+              for service_event
+              in self.SERVICE_EVENTS]
+    test_queue_producer.ProduceItems(events)
+    test_queue_producer.SignalEndOfInput()
+
+    # Initialize plugin.
+    analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text')
+
+    # Run the analysis plugin.
+    knowledge_base = self._SetUpKnowledgeBase()
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    self.assertEquals(len(analysis_reports), 1)
+
+    analysis_report = analysis_reports[0]
+
+    expected_text = (
+        u'Listing Windows Services\n'
+        u'TestbDriver\n'
+        u'\tImage Path    = C:\\Dell\\testdriver.sys\n'
+        u'\tService Type  = File System Driver (0x2)\n'
+        u'\tStart Type    = Auto Start (2)\n'
+        u'\tService Dll   = \n'
+        u'\tObject Name   = \n'
+        u'\tSources:\n'
+        u'\t\tC:\\WINDOWS\\system32\\SYSTEM:'
+        u'\\ControlSet001\\services\\TestbDriver\n'
+        u'\t\tC:\\WINDOWS\\system32\\SYSTEM:'
+        u'\\ControlSet003\\services\\TestbDriver\n\n')
+
+    self.assertEquals(expected_text, analysis_report.text)
+    self.assertEquals(analysis_report.plugin_name, 'windows_services')
+
+  def testRealEvents(self):
+    """Test the plugin with text output against real events from the parser."""
+    parser = winreg.WinRegistryParser()
+    # We could remove the non-Services plugins, but testing shows that the
+    # performance gain is negligible.
+
+    knowledge_base = self._SetUpKnowledgeBase()
+    test_path = self._GetTestFilePath(['SYSTEM'])
+    event_queue = self._ParseFile(parser, test_path, knowledge_base)
+
+    # Run the analysis plugin.
+    analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text')
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    report = analysis_reports[0]
+    text = report.text
+
+    # We'll check that a few strings are in the report, like they're supposed
+    # to be, rather than checking for the exact content of the string,
+    # as that's dependent on the full path to the test files.
+    test_strings = [u'1394ohci', u'WwanSvc', u'Sources:', u'ControlSet001',
+                    u'ControlSet002']
+    for string in test_strings:
+      self.assertTrue(string in text)
+
+  def testRealEventsYAML(self):
+    """Test the plugin with YAML output against real events from the parser."""
+    parser = winreg.WinRegistryParser()
+    # We could remove the non-Services plugins, but testing shows that the
+    # performance gain is negligible.
+
+    knowledge_base = self._SetUpKnowledgeBase()
+    test_path = self._GetTestFilePath(['SYSTEM'])
+    event_queue = self._ParseFile(parser, test_path, knowledge_base)
+
+    # Run the analysis plugin.
+    analysis_plugin = self._CreateAnalysisPlugin(event_queue, 'yaml')
+    analysis_report_queue_consumer = self._RunAnalysisPlugin(
+        analysis_plugin, knowledge_base)
+    analysis_reports = self._GetAnalysisReportsFromQueue(
+        analysis_report_queue_consumer)
+
+    report = analysis_reports[0]
+    text = report.text
+
+    # We'll check that a few strings are in the report, like they're supposed
+    # to be, rather than checking for the exact content of the string,
+    # as that's dependent on the full path to the test files.
+    test_strings = [windows_services.WindowsService.yaml_tag, u'1394ohci',
+                    u'WwanSvc', u'ControlSet001', u'ControlSet002']
+
+    for string in test_strings:
+      self.assertTrue(string in text, u'{0:s} not found in report text'.format(
+          string))
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The artifact knowledge base object.
+
+The knowledge base is filled by user provided input and the pre-processing
+phase. It is intended to provide successive phases, like the parsing and
+analysis phases, with essential information like e.g. the timezone and
+codepage of the source data.
+"""
+
+from plaso.lib import event
+
+import pytz
+
+
+class KnowledgeBase(object):
+  """Class that implements the artifact knowledge base."""
+
+  def __init__(self):
+    """Initialize the knowledge base object."""
+    super(KnowledgeBase, self).__init__()
+
+    # TODO: the first versions of the knowledge base will wrap the pre-process
+    # object, but this should be replaced by an artifact style knowledge base
+    # or artifact cache.
+    self._pre_obj = event.PreprocessObject()
+
+    self._default_codepage = u'cp1252'
+    self._default_timezone = pytz.timezone('UTC')
+
+  @property
+  def pre_obj(self):
+    """The pre-process object."""
+    return self._pre_obj
+
+  @property
+  def codepage(self):
+    """The codepage."""
+    return getattr(self._pre_obj, 'codepage', self._default_codepage)
+
+  @property
+  def hostname(self):
+    """The hostname."""
+    return getattr(self._pre_obj, 'hostname', u'')
+
+  @property
+  def platform(self):
+    """The platform."""
+    return getattr(self._pre_obj, 'guessed_os', u'')
+
+  @platform.setter
+  def platform(self, value):
+    """The platform."""
+    return setattr(self._pre_obj, 'guessed_os', value)
+
+  @property
+  def timezone(self):
+    """The timezone object."""
+    return getattr(self._pre_obj, 'zone', self._default_timezone)
+
+  @property
+  def users(self):
+    """The list of users."""
+    return getattr(self._pre_obj, 'users', [])
+
+  @property
+  def year(self):
+    """The year."""
+    return getattr(self._pre_obj, 'year', 0)
+
+  def GetUsernameByIdentifier(self, identifier):
+    """Retrieves the username based on an identifier.
+
+    Args:
+      identifier: the identifier, either a UID or SID.
+
+    Returns:
+      The username or - if not available.
+    """
+    if not identifier:
+      return u'-'
+
+    return self._pre_obj.GetUsernameById(identifier)
+
+  def GetValue(self, identifier, default_value=None):
+    """Retrieves a value by identifier.
+
+    Args:
+      identifier: the value identifier.
+      default_value: optional default value. The default is None.
+
+    Returns:
+      The value or None if not available.
+    """
+    return getattr(self._pre_obj, identifier, default_value)
+
+  def SetDefaultCodepage(self, codepage):
+    """Sets the default codepage.
+
+    Args:
+      codepage: the default codepage.
+    """
+    # TODO: check if value is sane.
+    self._default_codepage = codepage
+
+  def SetDefaultTimezone(self, timezone):
+    """Sets the default timezone.
+
+    Args:
+      timezone: the default timezone.
+    """
+    # TODO: check if value is sane.
+    self._default_timezone = timezone
+
+  def SetValue(self, identifier, value):
+    """Sets a value by identifier.
+
+    Args:
+      identifier: the value identifier.
+      value: the value.
+    """
+    setattr(self._pre_obj, identifier, value)
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,184 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the format classifier classes.
+
+Plaso is a tool that extracts events from files on a file system.
+For this it either reads files from a mounted file system or from an image.
+It uses an exhaustive approach to determine parse events from a file, meaning
+that it passes the file first to parser A and if that fails it continues with
+parser B.
+
+The classifier is designed to be able to more quickly determine the format of
+a file and limit the number of parsers part of the exhaustive approach.
+
+The current version of the classifier uses signatures to identify file formats.
+Some signatures must always be defined at a specific offset, this is referred to
+as an offset-bound signature or bound for short. Other signatures are commonly
+found at a specific offset but not necessarily. The last form of signatures is
+unbound, meaning that they don't have a fixed or common location where they can
+be found.
+
+A specification is a collection of signatures with additional metadata that
+defines a specific file format. These specifications are grouped into a store
+for ease of use, e.g. so that they can be read from a configuration file all
+at once.
+
+The classifier requires a scanner to analyze the data in a file. The scanner
+uses the specifications in a store to scan for the signatures or a certain
+format.
+
+The classifier allows for multiple methods of scanning a file:
+* full:      the entire file is scanned. This is the default scanning method.
+* head-tail: only the beginning (head) and the end (tail) of the file is
+             scanned. This approach is more efficient for larger files.
+             The buffer size is used as the size of the data that is scanned.
+             Smaller files are scanned entirely.
+
+The classifier returns zero or more classifications which point to a format
+specification and the scan results for the signatures defined by
+the specification.
+"""
+
+import logging
+
+
+class Classification(object):
+  """This class represents a format classification.
+
+     The format classification consists of a format specification and
+     scan results.
+  """
+
+  def __init__(self, specification, scan_matches):
+    """Initializes the classification.
+
+    Args:
+      specification: the format specification (instance of Specification).
+      scan_matches: the list of scan matches (instances of _ScanMatch).
+
+    Raises:
+      TypeError: if the specification is not of type Specification.
+    """
+    self._specification = specification
+    self.scan_matches = scan_matches
+
+  @property
+  def identifier(self):
+    """The classification type."""
+    return self._specification.identifier
+
+  @property
+  def magic_types(self):
+    """The magic types or an empty list if none."""
+    return self._specification.magic_types
+
+  @property
+  def mime_types(self):
+    """The mime type or an empty list if none."""
+    return self._specification.mime_types
+
+
+class Classifier(object):
+  """Class for classifying formats in raw data.
+
+  The classifier is initialized with one or more specifications.
+  After which it can be used to classify data in files or file-like objects.
+
+  The actual scanning of the data is done by the scanner, these are separate
+  to allow for the scanner to easily be replaced for a more efficient
+  alternative if necessary.
+
+  For an example of how the classifier is to be used see: classify.py.
+  """
+  BUFFER_SIZE = 16 * 1024 * 1024
+
+  def __init__(self, scanner):
+    """Initializes the classifier and sets up the scanning related structures.
+
+    Args:
+      scanner: an instance of the signature scanner.
+    """
+    self._scanner = scanner
+
+  def _GetClassifications(self, scan_results):
+    """Retrieves the classifications based on the scan results.
+
+    Multiple scan results are combined into a single classification.
+
+    Args:
+      scan_results: a list containing instances of _ScanResult.
+
+    Returns:
+      a list of instances of Classification.
+    """
+    classifications = {}
+
+    for scan_result in scan_results:
+      for scan_match in scan_result.scan_matches:
+        logging.debug(
+            u'scan match at offset: 0x{0:08x} specification: {1:s}'.format(
+                scan_match.total_data_offset, scan_result.identifier))
+
+      if scan_result.identifier not in classifications:
+        classifications[scan_result.identifier] = Classification(
+            scan_result.specification, scan_result.scan_matches)
+
+    return classifications.values()
+
+  def ClassifyBuffer(self, data, data_size):
+    """Classifies the data in a buffer, assumes all necessary data is available.
+
+    Args:
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+
+    Returns:
+      a list of classifications or an empty list.
+    """
+    scan_state = self._scanner.StartScan()
+    self._scanner.ScanBuffer(scan_state, data, data_size)
+    self._scanner.StopScan(scan_state)
+
+    return self._GetClassifications(scan_state.GetResults())
+
+  def ClassifyFileObject(self, file_object):
+    """Classifies the data in a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      a list of classifier classifications or an empty list.
+    """
+    scan_results = self._scanner.ScanFileObject(file_object)
+
+    return self._GetClassifications(scan_results)
+
+  def ClassifyFile(self, filename):
+    """Classifies the data in a file.
+
+    Args:
+      filename: the name of the file.
+
+    Returns:
+      a list of classifier classifications or an empty list.
+    """
+    classifications = []
+    with open(filename, 'rb') as file_object:
+      classifications = self.ClassifyFileObject(file_object)
+    return classifications
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the format classifier classes."""
+
+import os
+import unittest
+
+from plaso.classifier import classifier
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+class ClassifierTest(unittest.TestCase):
+  """Class to test Classifier."""
+
+  def setUp(self):
+    """Function to test the initialize function."""
+    self._store = test_lib.CreateSpecificationStore()
+
+    self._test_file1 = os.path.join('test_data', 'NTUSER.DAT')
+    self._test_file2 = os.path.join('test_data', 'syslog.zip')
+
+  def testClassifyFileWithScanner(self):
+    """Function to test the classify file function."""
+    test_scanner = scanner.Scanner(self._store)
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file1)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file2)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+  def testClassifyFileWithOffsetBoundScanner(self):
+    """Function to test the classify file function."""
+    test_scanner = scanner.OffsetBoundScanner(self._store)
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file1)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file2)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+
+if __name__ == "__main__":
+  unittest.main()
@@ -0,0 +1,78 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a small classify test program."""
+
+import argparse
+import glob
+import logging
+
+from plaso.classifier import classifier
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+def Main():
+  args_parser = argparse.ArgumentParser(
+      description='Classify test program.')
+
+  args_parser.add_argument(
+      '-t', '--type', type='choice', metavar='TYPE', action='store',
+      dest='scanner_type', choices=['scan-tree', 'scan_tree'],
+      default='scan-tree', help='The scanner type')
+
+  args_parser.add_argument(
+      '-v', '--verbose', action='store_true', dest='verbose', default=False,
+      help='Print verbose output')
+
+  args_parser.add_argument(
+      'filenames', nargs='+', action='store', metavar='FILENAMES',
+      default=None, help='The input filename(s) to classify.')
+
+  options = args_parser.parse_args()
+
+  if options.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+
+  files_to_classify = []
+  for input_glob in options.filenames:
+    files_to_classify += glob.glob(input_glob)
+
+  store = test_lib.CreateSpecificationStore()
+
+  if options.scanner_type not in ['scan-tree', 'scan_tree']:
+    print u'Unsupported scanner type defaulting to: scan-tree'
+
+  scan = scanner.Scanner(store)
+  classify = classifier.Classifier(scan)
+
+  for input_filename in files_to_classify:
+    classifications = classify.ClassifyFile(input_filename)
+
+    print u'File: {0:s}'.format(input_filename)
+    if not classifications:
+      print u'No classifications found.'
+    else:
+      print u'Classifications:'
+      for classification in classifications:
+        print u'\tformat: {0:s}'.format(classification.identifier)
+
+    print u''
+
+
+if __name__ == '__main__':
+  Main()
@@ -0,0 +1,308 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The patterns classes used by the scan tree-based format scanner."""
+
+
+class _ByteValuePatterns(object):
+  """Class that implements a mapping between byte value and patterns.
+
+     The byte value patterns are used in the scan tree-based format scanner
+     to map a byte value to one or more patterns.
+  """
+
+  def __init__(self, byte_value):
+    """Initializes the pattern table (entry) byte value.
+
+    Args:
+      byte_value: the byte value that maps the patterns in the table.
+    """
+    super(_ByteValuePatterns, self).__init__()
+    self.byte_value = byte_value
+    self.patterns = {}
+
+  def __unicode__(self):
+    """Retrieves a string representation of the byte value patterns."""
+    return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
+
+  def AddPattern(self, pattern):
+    """Adds a pattern.
+
+    Args:
+      pattern: the pattern (instance of Pattern).
+
+    Raises:
+      ValueError: if the table entry already contains a pattern
+                  with the same identifier.
+    """
+    if pattern.identifier in self.patterns:
+      raise ValueError(u'Pattern {0:s} is already defined.'.format(
+          pattern.identifier))
+
+    self.patterns[pattern.identifier] = pattern
+
+  def ToDebugString(self, indentation_level=1):
+    """Converts the byte value pattern into a debug string."""
+    indentation = u'  ' * indentation_level
+
+    header = u'{0:s}byte value: 0x{1:02x}\n'.format(
+        indentation, ord(self.byte_value))
+
+    entries = u''.join([u'{0:s}  patterns: {1:s}\n'.format(
+        indentation, identifier) for identifier in self.patterns])
+
+    return u''.join([header, entries, u'\n'])
+
+
+class _SkipTable(object):
+  """Class that implements a skip table.
+
+     The skip table is used in the scan tree-based format scanner to determine
+     the skip value for the Boyer–Moore–Horspool search.
+  """
+
+  def __init__(self, skip_pattern_length):
+    """Initializes the skip table.
+
+    Args:
+      skip_pattern_length: the (maximum) skip pattern length.
+    """
+    super(_SkipTable, self).__init__()
+    self._skip_value_per_byte_value = {}
+    self.skip_pattern_length = skip_pattern_length
+
+  def __getitem__(self, key):
+    """Retrieves a specific skip value.
+
+    Args:
+      key: the byte value within the skip table.
+
+    Returns:
+      the skip value for the key or the maximim skip value
+      if no corresponding key was found.
+    """
+    if key in self._skip_value_per_byte_value:
+      return self._skip_value_per_byte_value[key]
+    return self.skip_pattern_length
+
+  def SetSkipValue(self, byte_value, skip_value):
+    """Sets a skip value.
+
+    Args:
+      byte_value: the corresponding byte value.
+      skip_value: the number of bytes to skip.
+
+    Raises:
+      ValueError: if byte value or skip value is out of bounds.
+    """
+    if byte_value < 0 or byte_value > 255:
+      raise ValueError(u'Invalid byte value, value out of bounds.')
+
+    if skip_value < 0 or skip_value >= self.skip_pattern_length:
+      raise ValueError(u'Invalid skip value, value out of bounds.')
+
+    if (not byte_value in self._skip_value_per_byte_value or
+        self._skip_value_per_byte_value[byte_value] > skip_value):
+      self._skip_value_per_byte_value[byte_value] = skip_value
+
+  def ToDebugString(self):
+    """Converts the skip table into a debug string."""
+    header = u'Byte value\tSkip value\n'
+
+    entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
+        byte_value, self._skip_value_per_byte_value[byte_value])
+                        for byte_value in self._skip_value_per_byte_value])
+
+    default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
+
+    return u''.join([header, entries, default, u'\n'])
+
+
+class Pattern(object):
+  """Class that implements a pattern."""
+
+  def __init__(self, signature_index, signature, specification):
+    """Initializes the pattern.
+
+    Args:
+      signature_index: the index of the signature within the specification.
+      signature: the signature (instance of Signature).
+      specification: the specification (instance of Specification) that
+                     contains the signature.
+    """
+    super(Pattern, self).__init__()
+    self._signature_index = signature_index
+    self.signature = signature
+    self.specification = specification
+
+  def __unicode__(self):
+    """Retrieves a string representation."""
+    return self.identifier
+
+  @property
+  def expression(self):
+    """The signature expression."""
+    return self.signature.expression
+
+  @property
+  def identifier(self):
+    """The identifier."""
+    # Using _ here because some scanner implementation are limited to what
+    # characters can be used in the identifiers.
+    return u'{0:s}_{1:d}'.format(
+        self.specification.identifier, self._signature_index)
+
+  @property
+  def offset(self):
+    """The signature offset."""
+    return self.signature.offset
+
+  @property
+  def is_bound(self):
+    """Boolean value to indicate the signature is bound to an offset."""
+    return self.signature.is_bound
+
+
+class PatternTable(object):
+  """Class that implements a pattern table.
+
+     The pattern table is used in the the scan tree-based format scanner
+     to construct a scan tree. It contains either unbound patterns or
+     patterns bound to a specific offset.
+  """
+
+  def __init__(self, patterns, ignore_list, is_bound=None):
+    """Initializes and builds the patterns table from patterns.
+
+    Args:
+      patterns: a list of the patterns.
+      ignore_list: a list of pattern offsets to ignore.
+      is_bound: optional boolean value to indicate if the signatures are bound
+                to offsets. The default is None, which means the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+
+    Raises:
+      ValueError: if a signature pattern is too small to be useful (< 4).
+    """
+    super(PatternTable, self).__init__()
+    self._byte_values_per_offset = {}
+    self.largest_pattern_length = 0
+    self.largest_pattern_offset = 0
+    self.patterns = []
+    self.smallest_pattern_length = 0
+    self.smallest_pattern_offset = 0
+
+    for pattern in patterns:
+      if is_bound is not None and pattern.signature.is_bound != is_bound:
+        continue
+
+      pattern_length = len(pattern.expression)
+
+      if pattern_length < 4:
+        raise ValueError(u'Pattern too small to be useful.')
+
+      self.smallest_pattern_length = min(
+          self.smallest_pattern_length, pattern_length)
+      self.largest_pattern_length = max(
+          self.largest_pattern_length, pattern_length)
+
+      self.patterns.append(pattern)
+
+      self._AddPattern(pattern, ignore_list, is_bound)
+
+  def _AddPattern(self, pattern, ignore_list, is_bound):
+    """Adds the byte values per offset in the pattern to the table.
+
+    Args:
+      pattern: the pattern (instance of Pattern).
+      ignore_list: a list of pattern offsets to ignore.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+    """
+    pattern_offset = pattern.offset if is_bound else 0
+
+    self.smallest_pattern_offset = min(
+        self.smallest_pattern_offset, pattern_offset)
+    self.largest_pattern_offset = max(
+        self.largest_pattern_offset, pattern_offset)
+
+    for byte_value in pattern.expression:
+      if pattern_offset not in self._byte_values_per_offset:
+        self._byte_values_per_offset[pattern_offset] = {}
+
+      if pattern_offset not in ignore_list:
+        byte_values = self._byte_values_per_offset[pattern_offset]
+
+        if byte_value not in byte_values:
+          byte_values[byte_value] = _ByteValuePatterns(byte_value)
+
+        byte_value_patterns = byte_values[byte_value]
+
+        byte_value_patterns.AddPattern(pattern)
+
+      pattern_offset += 1
+
+  @property
+  def offsets(self):
+    """The offsets."""
+    return self._byte_values_per_offset.keys()
+
+  def GetByteValues(self, pattern_offset):
+    """Returns the bytes values for a specific pattern offset."""
+    return self._byte_values_per_offset[pattern_offset]
+
+  def GetSkipTable(self):
+    """Retrieves the skip table for the patterns in the table.
+
+    Returns:
+      The skip table (instance of SkipTable).
+    """
+    skip_table = _SkipTable(self.smallest_pattern_length)
+
+    for pattern in self.patterns:
+      if pattern.expression:
+        skip_value = self.smallest_pattern_length
+
+        for expression_index in range(0, self.smallest_pattern_length):
+          skip_value -= 1
+          skip_table.SetSkipValue(
+              ord(pattern.expression[expression_index]), skip_value)
+
+    return skip_table
+
+  def ToDebugString(self):
+    """Converts the pattern table into a debug string."""
+    header = u'Pattern offset\tByte value(s)\n'
+    entries = u''
+
+    for pattern_offset in self._byte_values_per_offset:
+      entries += u'{0:d}'.format(pattern_offset)
+
+      byte_values = self._byte_values_per_offset[pattern_offset]
+
+      for byte_value in byte_values:
+        identifiers = u', '.join(
+            [identifier for identifier in byte_values[byte_value].patterns])
+
+        entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
+
+      entries += u'\n'
+
+    return u''.join([header, entries, u'\n'])
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The range list data type."""
+
+
+class Range(object):
+  """Class that implements a range object."""
+
+  def __init__(self, range_offset, range_size):
+    """Initializes the range object.
+
+    Args:
+      range_offset: the range offset.
+      range_size: the range size.
+
+    Raises:
+      ValueError: if the range offset or range size is not valid.
+    """
+    if range_offset < 0:
+      raise ValueError(u'Invalid range offset value.')
+
+    if range_size < 0:
+      raise ValueError(u'Invalid range size value.')
+
+    super(Range, self).__init__()
+    self.start_offset = range_offset
+    self.size = range_size
+    self.end_offset = range_offset + range_size
+
+
+class RangeList(object):
+  """Class that implements a range list object."""
+
+  def __init__(self):
+    """Initializes the range list object."""
+    super(RangeList, self).__init__()
+    self.ranges = []
+
+  @property
+  def number_of_ranges(self):
+    """The number of ranges."""
+    return len(self.ranges)
+
+  def GetSpanningRange(self):
+    """Retrieves the range spanning the entire range list."""
+    if self.number_of_ranges == 0:
+      return
+
+    first_range = self.ranges[0]
+    last_range = self.ranges[-1]
+    range_size = last_range.end_offset - first_range.start_offset
+
+    return Range(first_range.start_offset, range_size)
+
+  def Insert(self, range_offset, range_size):
+    """Inserts the range defined by the offset and size in the list.
+
+       Note that overlapping ranges will be merged.
+
+    Args:
+      range_offset: the range offset.
+      range_size: the range size.
+
+    Raises:
+      RuntimeError: if the range cannot be inserted.
+      ValueError: if the range offset or range size is not valid.
+    """
+    if range_offset < 0:
+      raise ValueError(u'Invalid range offset value.')
+
+    if range_size < 0:
+      raise ValueError(u'Invalid range size value.')
+
+    insert_index = None
+    merge_index = None
+
+    number_of_range_objects = len(self.ranges)
+
+    range_end_offset = range_offset + range_size
+
+    if number_of_range_objects == 0:
+      insert_index = 0
+
+    else:
+      range_object_index = 0
+
+      for range_object in self.ranges:
+        # Ignore negative ranges.
+        if range_object.start_offset < 0:
+          range_object_index += 1
+          continue
+
+        # Insert the range before an existing one.
+        if range_end_offset < range_object.start_offset:
+          insert_index = range_object_index
+          break
+
+        # Ignore the range since the existing one overlaps it.
+        if (range_offset >= range_object.start_offset and
+            range_end_offset <= range_object.end_offset):
+          break
+
+        # Merge the range since it overlaps the existing one at the end.
+        if (range_offset >= range_object.start_offset and
+            range_offset <= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        # Merge the range since it overlaps the existing one at the start.
+        if (range_end_offset >= range_object.start_offset and
+            range_end_offset <= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        # Merge the range since it overlaps the existing one.
+        if (range_offset <= range_object.start_offset and
+            range_end_offset >= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        range_object_index += 1
+
+      # Insert the range after the last one.
+      if range_object_index >= number_of_range_objects:
+        insert_index = number_of_range_objects
+
+    if insert_index is not None and merge_index is not None:
+      raise RuntimeError(
+          u'Unable to insert the range both insert and merge specified.')
+
+    if insert_index is not None:
+      self.ranges.insert(insert_index, Range(range_offset, range_size))
+
+    elif merge_index is not None:
+      range_object = self.ranges[merge_index]
+      if range_offset < range_object.start_offset:
+        range_object.size += range_object.start_offset - range_offset
+        range_object.start_offset = range_offset
+      if range_end_offset > range_object.end_offset:
+        range_object.size += range_end_offset - range_object.end_offset
+        range_object.end_offset = range_end_offset
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the range list."""
+
+import unittest
+
+from plaso.classifier import range_list
+
+
+class RangeListTest(unittest.TestCase):
+  """Class to test the range list."""
+
+  def testInsertPositiveRanges(self):
+    """Function to test the insert function using positive ranges."""
+    range_list_object = range_list.RangeList()
+
+    # Test non-overlapping range.
+    range_list_object.Insert(500, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 1)
+
+    range_object = range_list_object.ranges[0]
+    self.assertEquals(range_object.start_offset, 500)
+    self.assertEquals(range_object.end_offset, 600)
+    self.assertEquals(range_object.size, 100)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(2000, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 2000)
+    self.assertEquals(range_object.end_offset, 2100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test range that overlaps with an existing range at the start.
+    range_list_object.Insert(1950, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1950)
+    self.assertEquals(range_object.end_offset, 2100)
+    self.assertEquals(range_object.size, 150)
+
+    # Test range that overlaps with an existing range at the end.
+    range_list_object.Insert(2050, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1950)
+    self.assertEquals(range_object.end_offset, 2150)
+    self.assertEquals(range_object.size, 200)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(1000, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1000)
+    self.assertEquals(range_object.end_offset, 1100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test range that aligns with an existing range at the end.
+    range_list_object.Insert(1100, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1000)
+    self.assertEquals(range_object.end_offset, 1200)
+    self.assertEquals(range_object.size, 200)
+
+    # Test range that aligns with an existing range at the start.
+    range_list_object.Insert(900, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 900)
+    self.assertEquals(range_object.end_offset, 1200)
+    self.assertEquals(range_object.size, 300)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(0, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 4)
+
+    range_object = range_list_object.ranges[0]
+    self.assertEquals(range_object.start_offset, 0)
+    self.assertEquals(range_object.end_offset, 100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test invalid ranges.
+    with self.assertRaises(ValueError):
+      range_list_object.Insert(-1, 100)
+
+    with self.assertRaises(ValueError):
+      range_list_object.Insert(3000, -100)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,744 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The scan tree classes used by the scan tree-based format scanner."""
+
+import logging
+
+from plaso.classifier import patterns
+from plaso.classifier import range_list
+
+
+class _PatternWeights(object):
+  """Class that implements pattern weights."""
+
+  def __init__(self):
+    """Initializes the pattern weights."""
+    super(_PatternWeights, self).__init__()
+    self._offsets_per_weight = {}
+    self._weight_per_offset = {}
+
+  def AddOffset(self, pattern_offset):
+    """Adds a pattern offset and sets its weight to 0.
+
+    Args:
+      pattern_offset: the pattern offset to add to the pattern weights.
+
+    Raises:
+      ValueError: if the pattern weights already contains the pattern offset.
+    """
+    if pattern_offset in self._weight_per_offset:
+      raise ValueError(u'Pattern offset already set.')
+
+    self._weight_per_offset[pattern_offset] = 0
+
+  def AddWeight(self, pattern_offset, weight):
+    """Adds a weight for a specific pattern offset.
+
+    Args:
+      pattern_offset: the pattern offset to add to the pattern weights.
+      weight: the corresponding weight to add.
+
+    Raises:
+      ValueError: if the pattern weights does not contain the pattern offset.
+    """
+    if pattern_offset not in self._weight_per_offset:
+      raise ValueError(u'Pattern offset not set.')
+
+    self._weight_per_offset[pattern_offset] += weight
+
+    if weight not in self._offsets_per_weight:
+      self._offsets_per_weight[weight] = []
+
+    self._offsets_per_weight[weight].append(pattern_offset)
+
+  def GetLargestWeight(self):
+    """Retrieves the largest weight or 0 if none."""
+    if self._offsets_per_weight:
+      return max(self._offsets_per_weight)
+
+    return 0
+
+  def GetOffsetsForWeight(self, weight):
+    """Retrieves the list of offsets for a specific weight."""
+    return self._offsets_per_weight[weight]
+
+  def GetWeightForOffset(self, pattern_offset):
+    """Retrieves the weight for a specific pattern offset."""
+    return self._weight_per_offset[pattern_offset]
+
+  def ToDebugString(self):
+    """Converts the pattern weights into a debug string."""
+    header1 = u'Pattern offset\tWeight\n'
+
+    entries1 = u''.join([u'{0:d}\t{1:d}\n'.format(
+        pattern_offset, self._weight_per_offset[pattern_offset])
+                         for pattern_offset in self._weight_per_offset])
+
+    header2 = u'Weight\tPattern offset(s)\n'
+
+    entries2 = u''.join([u'{0:d}\t{1!s}\n'.format(
+        weight, self._offsets_per_weight[weight])
+                         for weight in self._offsets_per_weight])
+
+    return u''.join([header1, entries1, u'\n', header2, entries2, u'\n'])
+
+  def SetWeight(self, pattern_offset, weight):
+    """Sets a weight for a specific pattern offset.
+
+    Args:
+      pattern_offset: the pattern offset to set in the pattern weights.
+      weight: the corresponding weight to set.
+
+    Raises:
+      ValueError: if the pattern weights does not contain the pattern offset.
+    """
+    if pattern_offset not in self._weight_per_offset:
+      raise ValueError(u'Pattern offset not set.')
+
+    self._weight_per_offset[pattern_offset] = weight
+
+    if weight not in self._offsets_per_weight:
+      self._offsets_per_weight[weight] = []
+
+    self._offsets_per_weight[weight].append(pattern_offset)
+
+
+class ScanTree(object):
+  """Class that implements a scan tree."""
+
+  _COMMON_BYTE_VALUES = frozenset(
+      '\x00\x01\xff\t\n\r 0123456789'
+      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+      'abcdefghijklmnopqrstuvwxyz')
+
+  # The offset must be positive, negative offsets are ignored.
+  OFFSET_MODE_POSITIVE = 1
+  # The offset must be negative, positive offsets are ignored.
+  OFFSET_MODE_NEGATIVE = 2
+  # The offset must be positive, an error is raised for negative offsets.
+  OFFSET_MODE_POSITIVE_STRICT = 3
+  # The offset must be negative, an error is raised for positive offsets.
+  OFFSET_MODE_NEGATIVE_STRICT = 4
+
+  def __init__(
+      self, specification_store, is_bound,
+      offset_mode=OFFSET_MODE_POSITIVE_STRICT):
+    """Initializes and builds the scan tree.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+      offset_mode: optional value to indicate how the signature offsets should
+                   be handled. The default is that the offset must be positive
+                   and an error is raised for negative offsets.
+    """
+    super(ScanTree, self).__init__()
+    self.largest_length = 0
+    self.pattern_list = []
+    self.range_list = range_list.RangeList()
+    self.root_node = None
+    self.skip_table = None
+
+    # First determine all the patterns from the specification store.
+    self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode)
+
+    # Next create the scan tree starting with the root node.
+    ignore_list = []
+    pattern_table = patterns.PatternTable(
+        self.pattern_list, ignore_list, is_bound)
+
+    if pattern_table.patterns:
+      self.root_node = self._BuildScanTreeNode(
+          pattern_table, ignore_list, is_bound)
+
+      logging.debug(u'Scan tree:\n{0:s}'.format(
+          self.root_node.ToDebugString()))
+
+      # At the end the skip table is determined to provide for the
+      # Boyer–Moore–Horspool skip value.
+      self.skip_table = pattern_table.GetSkipTable()
+
+      logging.debug(u'Skip table:\n{0:s}'.format(
+          self.skip_table.ToDebugString()))
+
+      self.largest_length = pattern_table.largest_pattern_length
+
+  def _BuildPatterns(
+      self, specification_store, is_bound,
+      offset_mode=OFFSET_MODE_POSITIVE_STRICT):
+    """Builds the list of patterns.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+      offset_mode: optional value to indicate how the signature offsets should
+                   be handled. The default is that the offset must be positive
+                   and an error is raised for negative offsets.
+
+    Raises:
+      ValueError: if a signature offset invalid according to specified offset
+                  mode or a signature pattern is too small to be useful (< 4).
+    """
+    self.pattern_list = []
+
+    for specification in specification_store.specifications:
+      signature_index = 0
+
+      for signature in specification.signatures:
+        if signature.expression:
+          signature_offset = signature.offset if is_bound else 0
+          signature_pattern_length = len(signature.expression)
+
+          # Make sure signature offset is numeric.
+          try:
+            signature_offset = int(signature_offset)
+          except (TypeError, ValueError):
+            signature_offset = 0
+
+          if signature_offset < 0:
+            if offset_mode == self.OFFSET_MODE_POSITIVE:
+              continue
+            elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT:
+              raise ValueError(u'Signature offset less than 0.')
+
+            # The range list does not allow offsets to be negative and thus
+            # the signature offset is turned into a positive equivalent.
+            signature_offset *= -1
+
+            # The signature size is substracted to make sure the spanning
+            # range will align with the original negative offset values.
+            signature_offset -= signature_pattern_length
+
+          elif signature_offset > 0:
+            if offset_mode == self.OFFSET_MODE_NEGATIVE:
+              continue
+            elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT:
+              raise ValueError(u'Signature offset greater than 0.')
+
+          if signature_pattern_length < 4:
+            raise ValueError(u'Signature pattern smaller than 4.')
+
+          pattern = patterns.Pattern(
+              signature_index, signature, specification)
+          self.pattern_list.append(pattern)
+          self.range_list.Insert(signature_offset, signature_pattern_length)
+
+        signature_index += 1
+
+  def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound):
+    """Builds a scan tree node.
+
+    Args:
+      pattern_table: a pattern table (instance of PatternTable).
+      ignore_list: a list of pattern offsets to ignore
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+
+    Raises:
+      ValueError: if number of byte value patterns value out of bounds.
+
+    Returns:
+      A scan tree node (instance of ScanTreeNode).
+    """
+    # Make a copy of the lists because the function is going to alter them
+    # and the changes must remain in scope of the function.
+    pattern_list = list(pattern_table.patterns)
+    ignore_list = list(ignore_list)
+
+    similarity_weights = _PatternWeights()
+    occurrence_weights = _PatternWeights()
+    value_weights = _PatternWeights()
+
+    for pattern_offset in pattern_table.offsets:
+      similarity_weights.AddOffset(pattern_offset)
+      occurrence_weights.AddOffset(pattern_offset)
+      value_weights.AddOffset(pattern_offset)
+
+      byte_values = pattern_table.GetByteValues(pattern_offset)
+      number_of_byte_values = len(byte_values)
+
+      if number_of_byte_values > 1:
+        occurrence_weights.SetWeight(pattern_offset, number_of_byte_values)
+
+      for byte_value in byte_values:
+        byte_value_patterns = byte_values[byte_value]
+        byte_value_weight = len(byte_value_patterns.patterns)
+
+        if byte_value_weight > 1:
+          similarity_weights.AddWeight(pattern_offset, byte_value_weight)
+
+        if byte_value_weight not in self._COMMON_BYTE_VALUES:
+          value_weights.AddWeight(pattern_offset, 1)
+
+    logging.debug(u'Pattern table:\n{0:s}'.format(
+        pattern_table.ToDebugString()))
+    logging.debug(u'Similarity weights:\n{0:s}'.format(
+        similarity_weights.ToDebugString()))
+    logging.debug(u'Occurrence weights:\n{0:s}'.format(
+        occurrence_weights.ToDebugString()))
+    logging.debug(u'Value weights:\n{0:s}'.format(
+        value_weights.ToDebugString()))
+
+    pattern_offset = self._GetMostSignificantPatternOffset(
+        pattern_list, similarity_weights, occurrence_weights, value_weights)
+
+    ignore_list.append(pattern_offset)
+
+    # For the scan tree negative offsets are adjusted so that
+    # the smallest pattern offset is 0.
+    scan_tree_pattern_offset = pattern_offset
+    if scan_tree_pattern_offset < 0:
+      scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset
+
+    scan_tree_node = ScanTreeNode(scan_tree_pattern_offset)
+
+    byte_values = pattern_table.GetByteValues(pattern_offset)
+
+    for byte_value in byte_values:
+      byte_value_patterns = byte_values[byte_value]
+
+      logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString()))
+
+      number_of_byte_value_patterns = len(byte_value_patterns.patterns)
+
+      if number_of_byte_value_patterns <= 0:
+        raise ValueError(
+            u'Invalid number of byte value patterns value out of bounds.')
+
+      elif number_of_byte_value_patterns == 1:
+        for identifier in byte_value_patterns.patterns:
+          logging.debug(
+              u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format(
+                  identifier, ord(byte_value)))
+
+          scan_tree_node.AddByteValue(
+              byte_value, byte_value_patterns.patterns[identifier])
+
+      else:
+        pattern_table = patterns.PatternTable(
+            byte_value_patterns.patterns.itervalues(), ignore_list, is_bound)
+
+        scan_sub_node = self._BuildScanTreeNode(
+            pattern_table, ignore_list, is_bound)
+
+        logging.debug(
+            u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format(
+                ord(byte_value), scan_sub_node.ToDebugString()))
+
+        scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node)
+
+      for identifier in byte_value_patterns.patterns:
+        logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format(
+            identifier, self._PatternsToDebugString(pattern_list)))
+
+        pattern_list.remove(byte_value_patterns.patterns[identifier])
+
+    logging.debug(u'Remaining patterns:\n{0:s}'.format(
+        self._PatternsToDebugString(pattern_list)))
+
+    number_of_patterns = len(pattern_list)
+
+    if number_of_patterns == 1:
+      logging.debug(u'Setting pattern: {0:s} for default value'.format(
+          pattern_list[0].identifier))
+
+      scan_tree_node.SetDefaultValue(pattern_list[0])
+
+    elif number_of_patterns > 1:
+      pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound)
+
+      scan_sub_node = self._BuildScanTreeNode(
+          pattern_table, ignore_list, is_bound)
+
+      logging.debug(u'Setting scan node for default value:\n{0:s}'.format(
+          scan_sub_node.ToDebugString()))
+
+      scan_tree_node.SetDefaultValue(scan_sub_node)
+
+    return scan_tree_node
+
+  def _GetMostSignificantPatternOffset(
+      self, pattern_list, similarity_weights, occurrence_weights,
+      value_weights):
+    """Returns the most significant pattern offset.
+
+    Args:
+      pattern_list: a list of patterns
+      similarity_weights: the similarity (pattern) weights.
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Raises:
+      ValueError: when pattern is an empty list.
+
+    Returns:
+      a pattern offset.
+    """
+    if not pattern_list:
+      raise ValueError(u'Missing pattern list.')
+
+    pattern_offset = None
+    number_of_patterns = len(pattern_list)
+
+    if number_of_patterns == 1:
+      pattern_offset = self._GetPatternOffsetForValueWeights(
+          value_weights)
+
+    elif number_of_patterns == 2:
+      pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
+          occurrence_weights, value_weights)
+
+    elif number_of_patterns > 2:
+      pattern_offset = self._GetPatternOffsetForSimilarityWeights(
+          similarity_weights, occurrence_weights, value_weights)
+
+    logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForOccurrenceWeights(
+      self, occurrence_weights, value_weights):
+    """Returns the most significant pattern offset based on the value weights.
+
+    Args:
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Returns:
+      a pattern offset.
+    """
+    debug_string = ""
+    pattern_offset = None
+
+    largest_weight = occurrence_weights.GetLargestWeight()
+    logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight(
+          largest_weight)
+      number_of_occurrence_offsets = len(occurrence_weight_offsets)
+    else:
+      number_of_occurrence_offsets = 0
+
+    if number_of_occurrence_offsets == 0:
+      pattern_offset = self._GetPatternOffsetForValueWeights(
+          value_weights)
+
+    elif number_of_occurrence_offsets == 1:
+      pattern_offset = occurrence_weight_offsets[0]
+
+    else:
+      largest_weight = 0
+      largest_value_weight = 0
+
+      for occurrence_offset in occurrence_weight_offsets:
+        value_weight = value_weights.GetWeightForOffset(
+            occurrence_offset)
+
+        debug_string = (
+            u'Occurrence offset: {0:d} value weight: {1:d}').format(
+                occurrence_offset, value_weight)
+
+        if not pattern_offset or largest_weight < value_weight:
+          largest_weight = value_weight
+          pattern_offset = occurrence_offset
+
+          debug_string += u' largest value weight: {0:d}'.format(
+              largest_value_weight)
+
+        logging.debug(u'{0:s}'.format(debug_string))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForSimilarityWeights(
+      self, similarity_weights, occurrence_weights, value_weights):
+    """Returns the most significant pattern offset.
+
+    Args:
+      similarity_weights: the similarity (pattern) weights.
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Returns:
+      a pattern offset.
+    """
+    debug_string = ""
+    pattern_offset = None
+
+    largest_weight = similarity_weights.GetLargestWeight()
+    logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      similarity_weight_offsets = similarity_weights.GetOffsetsForWeight(
+          largest_weight)
+      number_of_similarity_offsets = len(similarity_weight_offsets)
+    else:
+      number_of_similarity_offsets = 0
+
+    if number_of_similarity_offsets == 0:
+      pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
+          occurrence_weights, value_weights)
+
+    elif number_of_similarity_offsets == 1:
+      pattern_offset = similarity_weight_offsets[0]
+
+    else:
+      largest_weight = 0
+      largest_value_weight = 0
+
+      for similarity_offset in similarity_weight_offsets:
+        occurrence_weight = occurrence_weights.GetWeightForOffset(
+            similarity_offset)
+
+        debug_string = (
+            u'Similarity offset: {0:d} occurrence weight: {1:d}').format(
+                similarity_offset, occurrence_weight)
+
+        if largest_weight > 0 and largest_weight == occurrence_weight:
+          value_weight = value_weights.GetWeightForOffset(
+              similarity_offset)
+
+          debug_string += u' value weight: {0:d}'.format(value_weight)
+
+          if largest_value_weight < value_weight:
+            largest_weight = 0
+
+        if not pattern_offset or largest_weight < occurrence_weight:
+          largest_weight = occurrence_weight
+          pattern_offset = similarity_offset
+
+          largest_value_weight = value_weights.GetWeightForOffset(
+              similarity_offset)
+
+          debug_string += u' largest value weight: {0:d}'.format(
+              largest_value_weight)
+
+        logging.debug(u'{0:s}'.format(debug_string))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForValueWeights(
+      self, value_weights):
+    """Returns the most significant pattern offset based on the value weights.
+
+    Args:
+      value_weights: the value (pattern) weights.
+
+    Raises:
+      RuntimeError: no value weight offset were found.
+
+    Returns:
+      a pattern offset.
+    """
+    largest_weight = value_weights.GetLargestWeight()
+    logging.debug(u'Largest value weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight)
+      number_of_value_offsets = len(value_weight_offsets)
+    else:
+      number_of_value_offsets = 0
+
+    if number_of_value_offsets == 0:
+      raise RuntimeError(u'No value weight offsets found.')
+
+    return value_weight_offsets[0]
+
+  def _PatternsToDebugString(self, pattern_list):
+    """Converts the list of patterns into a debug string."""
+    entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list])
+
+    return u''.join([u'[', entries, u']'])
+
+
+class ScanTreeNode(object):
+  """Class that implements a scan tree node."""
+
+  def __init__(self, pattern_offset):
+    """Initializes the scan tree node.
+
+    Args:
+      pattern_offset: the offset in the pattern to which the node
+                      applies.
+    """
+    super(ScanTreeNode, self).__init__()
+    self._byte_values = {}
+    self.default_value = None
+    self.parent = None
+    self.pattern_offset = pattern_offset
+
+  def AddByteValue(self, byte_value, scan_object):
+    """Adds a byte value.
+
+    Args:
+      byte_value:  the corresponding byte value.
+      scan_object: the scan object, either a scan sub node or a pattern.
+
+    Raises:
+      ValueError: if byte value is out of bounds or if the node already
+                  contains a scan object for the byte value.
+    """
+    if isinstance(byte_value, str):
+      byte_value = ord(byte_value)
+
+    if byte_value < 0 or byte_value > 255:
+      raise ValueError(u'Invalid byte value, value out of bounds.')
+
+    if byte_value in self._byte_values:
+      raise ValueError(u'Byte value already set.')
+
+    if isinstance(scan_object, ScanTreeNode):
+      scan_object.parent = self
+
+    self._byte_values[byte_value] = scan_object
+
+  def CompareByteValue(
+      self, data, data_offset, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the bounded scan tree.
+
+       This function will return partial matches on the ata block block
+       boundary as long as the total data size has not been reached.
+
+    Args:
+      data: a buffer containing raw data.
+      data_offset: the offset in the raw data in the buffer.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+
+    Returns:
+      the resulting scan object which is either a ScanTreeNode or Pattern
+      or None.
+
+    Raises:
+      RuntimeError: if the data offset, total data offset, total data size
+                    or pattern offset value is out of bounds.
+    """
+    found_match = False
+    scan_tree_byte_value = 0
+
+    if data_offset < 0 or data_offset >= data_size:
+      raise RuntimeError(u'Invalid data offset, value out of bounds.')
+
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size, value out of bounds.')
+
+    if total_data_offset < 0 or (
+        total_data_size is not None and total_data_offset >= total_data_size):
+      raise RuntimeError(u'Invalid total data offset, value out of bounds.')
+
+    if (total_data_size is not None and
+        total_data_offset + data_size >= total_data_size):
+      match_on_boundary = True
+    else:
+      match_on_boundary = False
+
+    data_offset += self.pattern_offset
+
+    if not match_on_boundary and data_offset >= data_size:
+      raise RuntimeError(u'Invalid pattern offset value, out of bounds.')
+
+    if data_offset < data_size:
+      data_byte_value = ord(data[data_offset])
+
+      for scan_tree_byte_value in self._byte_values:
+        if data_byte_value == scan_tree_byte_value:
+          found_match = True
+          break
+
+    if found_match:
+      scan_object = self._byte_values[scan_tree_byte_value]
+
+      logging.debug(
+          u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset)
+      )
+
+    else:
+      scan_object = self.default_value
+
+      if not scan_object:
+        scan_object = self.parent
+        while scan_object and not scan_object.default_value:
+          scan_object = scan_object.parent
+
+        if scan_object:
+          scan_object = scan_object.default_value
+
+    return scan_object
+
+  def SetDefaultValue(self, scan_object):
+    """Sets the default (non-match) value.
+
+    Args:
+      scan_object: the scan object, either a scan sub node or a pattern.
+
+    Raises:
+      ValueError: if the default value is already set.
+    """
+    if self.default_value:
+      raise ValueError(u'Default value already set.')
+
+    self.default_value = scan_object
+
+  def ToDebugString(self, indentation_level=1):
+    """Converts the scan tree node into a debug string."""
+    indentation = u'  ' * indentation_level
+
+    header = u'{0:s}pattern offset: {1:d}\n'.format(
+        indentation, self.pattern_offset)
+
+    entries = u''
+
+    for byte_value in self._byte_values:
+      entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value)
+
+      if isinstance(self._byte_values[byte_value], ScanTreeNode):
+        entries += u'{0:s}scan tree node:\n'.format(indentation)
+        entries += self._byte_values[byte_value].ToDebugString(
+            indentation_level + 1)
+
+      elif isinstance(self._byte_values[byte_value], patterns.Pattern):
+        entries += u'{0:s}pattern: {1:s}\n'.format(
+            indentation, self._byte_values[byte_value].identifier)
+
+    default = u'{0:s}default value:\n'.format(indentation)
+
+    if isinstance(self.default_value, ScanTreeNode):
+      default += u'{0:s}scan tree node:\n'.format(indentation)
+      default += self.default_value.ToDebugString(indentation_level + 1)
+
+    elif isinstance(self.default_value, patterns.Pattern):
+      default += u'{0:s}pattern: {1:s}\n'.format(
+          indentation, self.default_value.identifier)
+
+    return u''.join([header, entries, default, u'\n'])
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the scan tree classes."""
+
+import unittest
+
+from plaso.classifier import patterns
+from plaso.classifier import scan_tree
+from plaso.classifier import specification
+
+
+class ScanTreeNodeTest(unittest.TestCase):
+  """Class to test the scan tree node."""
+
+  def testAddByteValueWithPattern(self):
+    """Function to test the add byte value with pattern function."""
+    scan_node = scan_tree.ScanTreeNode(0)
+
+    format_regf = specification.Specification('REGF')
+    format_regf.AddNewSignature('regf', offset=0)
+
+    format_esedb = specification.Specification('ESEDB')
+    format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
+
+    signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4)
+    signature_regf = specification.Signature('regf', offset=0)
+
+    pattern_regf = patterns.Pattern(0, signature_regf, format_regf)
+    pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb)
+
+    scan_node.AddByteValue('r', pattern_regf)
+    scan_node.AddByteValue('\xef', pattern_esedb)
+
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 'r', pattern_regf)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, -1, pattern_regf)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 256, pattern_regf)
+
+  def testAddByteValueWithScanNode(self):
+    """Function to test the add byte value with scan node function."""
+    scan_node = scan_tree.ScanTreeNode(0)
+    scan_sub_node_0x41 = scan_tree.ScanTreeNode(1)
+    scan_sub_node_0x80 = scan_tree.ScanTreeNode(1)
+
+    scan_node.AddByteValue(0x41, scan_sub_node_0x41)
+    scan_node.AddByteValue(0x80, scan_sub_node_0x80)
+
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,749 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the classes for a scan tree-based format scanner."""
+
+import logging
+import os
+
+from plaso.classifier import patterns
+from plaso.classifier import range_list
+from plaso.classifier import scan_tree
+
+
+class _ScanMatch(object):
+  """Class that implements a scan match."""
+
+  def __init__(self, total_data_offset, pattern):
+    """Initializes the scan result.
+
+    Args:
+      total_data_offset: the offset of the resulting match relative
+                         to the start of the total data scanned.
+      pattern: the pattern matched.
+    """
+    super(_ScanMatch, self).__init__()
+    self.total_data_offset = total_data_offset
+    self.pattern = pattern
+
+  @property
+  def specification(self):
+    """The specification."""
+    return self.pattern.specification
+
+
+class _ScanResult(object):
+  """Class that implements a scan result."""
+
+  def __init__(self, specification):
+    """Initializes the scan result.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+    """
+    super(_ScanResult, self).__init__()
+    self.specification = specification
+    self.scan_matches = []
+
+  @property
+  def identifier(self):
+    """The specification identifier."""
+    return self.specification.identifier
+
+
+class ScanState(object):
+  """Class that implements a scan state."""
+
+  # The state definitions.
+  _SCAN_STATE_START = 1
+  _SCAN_STATE_SCANNING = 2
+  _SCAN_STATE_STOP = 3
+
+  def __init__(self, scan_tree_node, total_data_size=None):
+    """Initializes the scan state.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    """
+    super(ScanState, self).__init__()
+    self._matches = []
+    self.remaining_data = None
+    self.remaining_data_size = 0
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_START
+    self.total_data_offset = 0
+    self.total_data_size = total_data_size
+
+  def AddMatch(self, total_data_offset, pattern):
+    """Adds a result to the state to scanning.
+
+    Args:
+      total_data_offset: the offset of the resulting match relative
+                         to the start total data scanned.
+      pattern: the pattern matched.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self._matches.append(_ScanMatch(total_data_offset, pattern))
+
+  def GetMatches(self):
+    """Retrieves a list containing the results.
+
+    Returns:
+      A list of scan matches (instances of _ScanMatch).
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if self.state != self._SCAN_STATE_STOP:
+      raise RuntimeError(u'Unsupported scan state.')
+
+    return self._matches
+
+  def Reset(self, scan_tree_node):
+    """Resets the state to start.
+
+       This function will clear the remaining data.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if self.state != self._SCAN_STATE_STOP:
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.remaining_data = None
+    self.remaining_data_size = 0
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_START
+
+  def Scanning(self, scan_tree_node, total_data_offset):
+    """Sets the state to scanning.
+
+    Args:
+      scan_tree_node: the active scan tree node.
+      total_data_offset: the offset of the resulting match relative
+                         to the start of the total data scanned.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_SCANNING
+    self.total_data_offset = total_data_offset
+
+  def Stop(self):
+    """Sets the state to stop.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.scan_tree_node = None
+    self.state = self._SCAN_STATE_STOP
+
+
+class ScanTreeScannerBase(object):
+  """Class that implements a scan tree-based scanner base."""
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(ScanTreeScannerBase, self).__init__()
+    self._scan_tree = None
+    self._specification_store = specification_store
+
+  def _ScanBufferScanState(
+      self, scan_tree_object, scan_state, data, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the scan tree.
+
+    This function implements a Boyer–Moore–Horspool equivalent approach
+    in combination with the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+
+    Raises:
+      RuntimeError: if the total data offset, total data size or the last
+                    pattern offset value is out of bounds
+    """
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size, value out of bounds.')
+
+    if total_data_offset < 0 or (
+        total_data_size is not None and total_data_offset >= total_data_size):
+      raise RuntimeError(u'Invalid total data offset, value out of bounds.')
+
+    data_offset = 0
+    scan_tree_node = scan_state.scan_tree_node
+
+    if scan_state.remaining_data:
+      # str.join() should be more efficient then concatenation by +.
+      data = ''.join([scan_state.remaining_data, data])
+      data_size += scan_state.remaining_data_size
+
+      scan_state.remaining_data = None
+      scan_state.remaining_data_size = 0
+
+    if (total_data_size is not None and
+        total_data_offset + data_size >= total_data_size):
+      match_on_boundary = True
+    else:
+      match_on_boundary = False
+
+    while data_offset < data_size:
+      if (not match_on_boundary and
+          data_offset + scan_tree_object.largest_length >= data_size):
+        break
+
+      found_match = False
+      scan_done = False
+
+      while not scan_done:
+        scan_object = scan_tree_node.CompareByteValue(
+            data, data_offset, data_size, total_data_offset,
+            total_data_size=total_data_size)
+
+        if isinstance(scan_object, scan_tree.ScanTreeNode):
+          scan_tree_node = scan_object
+        else:
+          scan_done = True
+
+      if isinstance(scan_object, patterns.Pattern):
+        pattern_length = len(scan_object.signature.expression)
+        data_last_offset = data_offset + pattern_length
+
+        if cmp(scan_object.signature.expression,
+               data[data_offset:data_last_offset]) == 0:
+
+          if (not scan_object.signature.is_bound or
+              scan_object.signature.offset == data_offset):
+            found_match = True
+
+            logging.debug(
+                u'Signature match at data offset: 0x{0:08x}.'.format(
+                    data_offset))
+
+            scan_state.AddMatch(total_data_offset + data_offset, scan_object)
+
+      if found_match:
+        skip_value = len(scan_object.signature.expression)
+        scan_tree_node = scan_tree_object.root_node
+      else:
+        last_pattern_offset = (
+            scan_tree_object.skip_table.skip_pattern_length - 1)
+
+        if data_offset + last_pattern_offset >= data_size:
+          raise RuntimeError(
+              u'Invalid last pattern offset, value out of bounds.')
+        skip_value = 0
+
+        while last_pattern_offset >= 0 and not skip_value:
+          last_data_offset = data_offset + last_pattern_offset
+          byte_value = ord(data[last_data_offset])
+          skip_value = scan_tree_object.skip_table[byte_value]
+          last_pattern_offset -= 1
+
+        if not skip_value:
+          skip_value = 1
+
+        scan_tree_node = scan_tree_object.root_node
+
+      data_offset += skip_value
+
+    if not match_on_boundary and data_offset < data_size:
+      scan_state.remaining_data = data[data_offset:data_size]
+      scan_state.remaining_data_size = data_size - data_offset
+
+    scan_state.Scanning(scan_tree_node, total_data_offset + data_offset)
+
+  def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state):
+    """Scans the remaining data in the scan state using the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+    """
+    if scan_state.remaining_data:
+      data = scan_state.remaining_data
+      data_size = scan_state.remaining_data_size
+
+      scan_state.remaining_data = None
+      scan_state.remaining_data_size = 0
+
+      # Setting the total data size will make boundary matches are returned
+      # in this scanning pass.
+      total_data_size = scan_state.total_data_size
+      if total_data_size is None:
+        total_data_size = scan_state.total_data_offset + data_size
+
+      self._ScanBufferScanState(
+          scan_tree_object, scan_state, data, data_size,
+          scan_state.total_data_offset, total_data_size=total_data_size)
+
+    scan_state.Stop()
+
+  def GetScanResults(self, scan_state):
+    """Retrieves the scan results.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+
+    Return:
+      A list of scan results (instances of _ScanResult).
+    """
+    scan_results = {}
+
+    for scan_match in scan_state.GetMatches():
+      specification = scan_match.specification
+      identifier = specification.identifier
+
+      logging.debug(
+          u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format(
+              scan_match.total_data_offset, identifier))
+
+      if identifier not in scan_results:
+        scan_results[identifier] = _ScanResult(specification)
+
+      scan_results[identifier].scan_matches.append(scan_match)
+
+    return scan_results.values()
+
+
+class Scanner(ScanTreeScannerBase):
+  """Class that implements a scan tree-based scanner."""
+
+  _READ_BUFFER_SIZE = 512
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(Scanner, self).__init__(specification_store)
+
+  def ScanBuffer(self, scan_state, data, data_size):
+    """Scans a buffer.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+    """
+    self._ScanBufferScanState(
+        self._scan_tree, scan_state, data, data_size,
+        scan_state.total_data_offset,
+        total_data_size=scan_state.total_data_size)
+
+  def ScanFileObject(self, file_object):
+    """Scans a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      A list of scan results (instances of ScanResult).
+    """
+    file_offset = 0
+
+    if hasattr(file_object, 'get_size'):
+      file_size = file_object.get_size()
+    else:
+      file_object.seek(0, os.SEEK_END)
+      file_size = file_object.tell()
+
+    scan_state = self.StartScan(total_data_size=file_size)
+
+    file_object.seek(file_offset, os.SEEK_SET)
+
+    while file_offset < file_size:
+      data = file_object.read(self._READ_BUFFER_SIZE)
+      data_size = len(data)
+
+      if data_size == 0:
+        break
+
+      self._ScanBufferScanState(
+          self._scan_tree, scan_state, data, data_size, file_offset,
+          total_data_size=file_size)
+
+      file_offset += data_size
+
+    self.StopScan(scan_state)
+
+    return self.GetScanResults(scan_state)
+
+  def StartScan(self, total_data_size=None):
+    """Starts a scan.
+
+       The function sets up the scanning related structures if necessary.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A scan state (instance of ScanState).
+
+    Raises:
+      RuntimeError: when total data size is invalid.
+    """
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size.')
+
+    if self._scan_tree is None:
+      self._scan_tree = scan_tree.ScanTree(
+          self._specification_store, None)
+
+    return ScanState(self._scan_tree.root_node, total_data_size=total_data_size)
+
+  def StopScan(self, scan_state):
+    """Stops a scan.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+    """
+    self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
+
+
+class OffsetBoundScanner(ScanTreeScannerBase):
+  """Class that implements an offset-bound scan tree-based scanner."""
+
+  _READ_BUFFER_SIZE = 512
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(OffsetBoundScanner, self).__init__(specification_store)
+    self._footer_scan_tree = None
+    self._footer_spanning_range = None
+    self._header_scan_tree = None
+    self._header_spanning_range = None
+
+  def _GetFooterRange(self, total_data_size):
+    """Retrieves the read buffer aligned footer range.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A range (instance of Range).
+    """
+    # The actual footer range is in reverse since the spanning footer range
+    # is based on positive offsets, where 0 is the end of file.
+    if self._footer_spanning_range.end_offset < total_data_size:
+      footer_range_start_offset = (
+          total_data_size - self._footer_spanning_range.end_offset)
+    else:
+      footer_range_start_offset = 0
+
+    # Calculate the lower bound modulus of the footer range start offset
+    # in increments of the read buffer size.
+    footer_range_start_offset /= self._READ_BUFFER_SIZE
+    footer_range_start_offset *= self._READ_BUFFER_SIZE
+
+    # Calculate the upper bound modulus of the footer range size
+    # in increments of the read buffer size.
+    footer_range_size = self._footer_spanning_range.size
+    remainder = footer_range_size % self._READ_BUFFER_SIZE
+    footer_range_size /= self._READ_BUFFER_SIZE
+
+    if remainder > 0:
+      footer_range_size += 1
+
+    footer_range_size *= self._READ_BUFFER_SIZE
+
+    return range_list.Range(footer_range_start_offset, footer_range_size)
+
+  def _GetHeaderRange(self):
+    """Retrieves the read buffer aligned header range.
+
+    Returns:
+      A range (instance of Range).
+    """
+    # Calculate the lower bound modulus of the header range start offset
+    # in increments of the read buffer size.
+    header_range_start_offset = self._header_spanning_range.start_offset
+    header_range_start_offset /= self._READ_BUFFER_SIZE
+    header_range_start_offset *= self._READ_BUFFER_SIZE
+
+    # Calculate the upper bound modulus of the header range size
+    # in increments of the read buffer size.
+    header_range_size = self._header_spanning_range.size
+    remainder = header_range_size % self._READ_BUFFER_SIZE
+    header_range_size /= self._READ_BUFFER_SIZE
+
+    if remainder > 0:
+      header_range_size += 1
+
+    header_range_size *= self._READ_BUFFER_SIZE
+
+    return range_list.Range(header_range_start_offset, header_range_size)
+
+  def _ScanBufferScanState(
+      self, scan_tree_object, scan_state, data, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the scan tree.
+
+    This function implements a Boyer–Moore–Horspool equivalent approach
+    in combination with the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    """
+    scan_done = False
+    scan_tree_node = scan_tree_object.root_node
+
+    while not scan_done:
+      data_offset = 0
+
+      scan_object = scan_tree_node.CompareByteValue(
+          data, data_offset, data_size, total_data_offset,
+          total_data_size=total_data_size)
+
+      if isinstance(scan_object, scan_tree.ScanTreeNode):
+        scan_tree_node = scan_object
+      else:
+        scan_done = True
+
+    if isinstance(scan_object, patterns.Pattern):
+      pattern_length = len(scan_object.signature.expression)
+      pattern_start_offset = scan_object.signature.offset
+      pattern_end_offset = pattern_start_offset + pattern_length
+
+      if cmp(scan_object.signature.expression,
+             data[pattern_start_offset:pattern_end_offset]) == 0:
+        scan_state.AddMatch(
+            total_data_offset + scan_object.signature.offset, scan_object)
+
+        logging.debug(
+            u'Signature match at data offset: 0x{0:08x}.'.format(data_offset))
+
+  # TODO: implement.
+  # def ScanBuffer(self, scan_state, data, data_size):
+  #   """Scans a buffer.
+
+  #   Args:
+  #     scan_state: the scan state (instance of ScanState).
+  #     data: a buffer containing raw data.
+  #     data_size: the size of the raw data in the buffer.
+  #   """
+  #   # TODO: fix footer scanning logic.
+  #   # need to know the file size here for the footers.
+
+  #   # TODO: check for clashing ranges?
+
+  #   header_range = self._GetHeaderRange()
+  #   footer_range = self._GetFooterRange(scan_state.total_data_size)
+
+  #   if self._scan_tree == self._header_scan_tree:
+  #     if (scan_state.total_data_offset >= header_range.start_offset and
+  #         scan_state.total_data_offset < header_range.end_offset):
+  #       self._ScanBufferScanState(
+  #           self._scan_tree, scan_state, data, data_size,
+  #           scan_state.total_data_offset,
+  #           total_data_size=scan_state.total_data_size)
+
+  #     elif scan_state.total_data_offset > header_range.end_offset:
+  #       # TODO: implement.
+  #       pass
+
+  #   if self._scan_tree == self._footer_scan_tree:
+  #     if (scan_state.total_data_offset >= footer_range.start_offset and
+  #           scan_state.total_data_offset < footer_range.end_offset):
+  #       self._ScanBufferScanState(
+  #           self._scan_tree, scan_state, data, data_size,
+  #           scan_state.total_data_offset,
+  #           total_data_size=scan_state.total_data_size)
+
+  def ScanFileObject(self, file_object):
+    """Scans a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      A scan state (instance of ScanState).
+    """
+    # TODO: add support for fixed size block-based reads.
+
+    if hasattr(file_object, 'get_size'):
+      file_size = file_object.get_size()
+    else:
+      file_object.seek(0, os.SEEK_END)
+      file_size = file_object.tell()
+
+    file_offset = 0
+    scan_state = self.StartScan(total_data_size=file_size)
+
+    if self._header_scan_tree.root_node is not None:
+      header_range = self._GetHeaderRange()
+
+      # TODO: optimize the read by supporting fixed size block-based reads.
+      # if file_offset < header_range.start_offset:
+      #   file_offset = header_range.start_offset
+
+      file_object.seek(file_offset, os.SEEK_SET)
+
+      # TODO: optimize the read by supporting fixed size block-based reads.
+      # data = file_object.read(header_range.size)
+      data = file_object.read(header_range.end_offset)
+      data_size = len(data)
+
+      if data_size > 0:
+        self._ScanBufferScanState(
+            self._scan_tree, scan_state, data, data_size, file_offset,
+            total_data_size=file_size)
+
+      file_offset += data_size
+
+      if self._footer_scan_tree.root_node is not None:
+        self.StopScan(scan_state)
+
+        self._scan_tree = self._footer_scan_tree
+        scan_state.Reset(self._scan_tree.root_node)
+
+    if self._footer_scan_tree.root_node is not None:
+      footer_range = self._GetFooterRange(file_size)
+
+      # Note that the offset in the footer scan tree start with 0. Make sure
+      # the data offset of the data being scanned is aligned with the offset
+      # in the scan tree.
+      if footer_range.start_offset < self._footer_spanning_range.end_offset:
+        data_offset = (
+            self._footer_spanning_range.end_offset - footer_range.start_offset)
+      else:
+        data_offset = 0
+
+      if file_offset < footer_range.start_offset:
+        file_offset = footer_range.start_offset
+
+      file_object.seek(file_offset, os.SEEK_SET)
+
+      data = file_object.read(self._READ_BUFFER_SIZE)
+      data_size = len(data)
+
+      if data_size > 0:
+        self._ScanBufferScanState(
+            self._scan_tree, scan_state, data[data_offset:],
+            data_size - data_offset, file_offset + data_offset,
+            total_data_size=file_size)
+
+    self.StopScan(scan_state)
+
+    return self.GetScanResults(scan_state)
+
+  def StartScan(self, total_data_size=None):
+    """Starts a scan.
+
+       The function sets up the scanning related structures if necessary.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A list of scan results (instances of ScanResult).
+
+    Raises:
+      RuntimeError: when total data size is invalid.
+    """
+    if total_data_size is None or total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size.')
+
+    if self._header_scan_tree is None:
+      self._header_scan_tree = scan_tree.ScanTree(
+          self._specification_store, True,
+          offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE)
+
+    if self._header_spanning_range is None:
+      spanning_range = self._header_scan_tree.range_list.GetSpanningRange()
+      self._header_spanning_range = spanning_range
+
+    if self._footer_scan_tree is None:
+      self._footer_scan_tree = scan_tree.ScanTree(
+          self._specification_store, True,
+          offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE)
+
+    if self._footer_spanning_range is None:
+      spanning_range = self._footer_scan_tree.range_list.GetSpanningRange()
+      self._footer_spanning_range = spanning_range
+
+    if self._header_scan_tree.root_node is not None:
+      self._scan_tree = self._header_scan_tree
+    elif self._footer_scan_tree.root_node is not None:
+      self._scan_tree = self._footer_scan_tree
+    else:
+      self._scan_tree = None
+
+    if self._scan_tree is not None:
+      root_node = self._scan_tree.root_node
+    else:
+      root_node = None
+
+    return ScanState(root_node, total_data_size=total_data_size)
+
+  def StopScan(self, scan_state):
+    """Stops a scan.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+    """
+    self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
+    self._scan_tree = None
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the format scanner classes."""
+
+import unittest
+
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+class ScannerTest(unittest.TestCase):
+  """Class to test the scanner."""
+
+  def testInitialize(self):
+    """Function to test the initialize function."""
+    store = test_lib.CreateSpecificationStore()
+
+    # Signature for LNK
+    data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
+             '\x00\x00\x00\x46')
+
+    # Signature for REGF
+    data2 = 'regf'
+
+    # Random data
+    data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4'
+
+    # Boundary scan test
+    data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+              '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK')
+    data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+              '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z')
+
+    # Large buffer test
+    data5_size = 1024 * 1024
+    data5 = '\x00' * (data5_size - 4)
+    data5 += 'PK\x07\x08'
+
+    test_scanner = scanner.Scanner(store)
+
+    total_data_size = len(data1)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data1, len(data1))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data1, len(data1))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data2)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data2, len(data2))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data2, len(data2))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data3)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data3, len(data3))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 0)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data3, len(data3))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 0)
+
+    total_data_size = len(data4a) + len(data4b)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
+    test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
+    test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data5)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data5, len(data5))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The format specification classes."""
+
+
+class Signature(object):
+  """Class that defines a signature of a format specification.
+
+  The signature consists of a byte string expression, an optional
+  offset relative to the start of the data, and a value to indidate
+  if the expression is bound to the offset.
+  """
+  def __init__(self, expression, offset=None, is_bound=False):
+    """Initializes the signature.
+
+    Args:
+      expression: string containing the expression of the signature.
+                  The expression consists of a byte string at the moment
+                  regular expression (regexp) are not supported.
+      offset: the offset of the signature or None by default. None is used
+              to indicate the signature has no offset. A positive offset
+              is relative from the start of the data a negative offset
+              is relative from the end of the data.
+      is_bound: boolean value to indicate the signature must be bound to
+                the offset or False by default.
+    """
+    self.expression = expression
+    self.offset = offset
+    self.is_bound = is_bound
+
+
+class Specification(object):
+  """Class that contains a format specification."""
+
+  def __init__(self, identifier):
+    """Initializes the specification.
+
+    Args:
+      identifier: string containing a unique name for the format.
+    """
+    self.identifier = identifier
+    self.mime_types = []
+    self.signatures = []
+    self.universal_type_identifiers = []
+
+  def AddMimeType(self, mime_type):
+    """Adds a MIME type."""
+    self.mime_types.append(mime_type)
+
+  def AddNewSignature(self, expression, offset=None, is_bound=False):
+    """Adds a signature.
+
+    Args:
+      expression: string containing the expression of the signature.
+      offset: the offset of the signature or None by default. None is used
+              to indicate the signature has no offset. A positive offset
+              is relative from the start of the data a negative offset
+              is relative from the end of the data.
+      is_bound: boolean value to indicate the signature must be bound to
+                the offset or False by default.
+    """
+    self.signatures.append(
+        Signature(expression, offset=offset, is_bound=is_bound))
+
+  def AddUniversalTypeIdentifier(self, universal_type_identifiers):
+    """Adds a Universal Type Identifier (UTI)."""
+    self.universal_type_identifiers.append(universal_type_identifiers)
+
+
+class SpecificationStore(object):
+  """Class that servers as a store for specifications."""
+
+  def __init__(self):
+    """Initializes the specification store."""
+    self._format_specifications = {}
+
+  @property
+  def specifications(self):
+    """A specifications iterator object."""
+    return self._format_specifications.itervalues()
+
+  def AddNewSpecification(self, identifier):
+    """Adds a new specification.
+
+    Args:
+      identifier: a string containing the format identifier,
+                  which should be unique for the store.
+
+    Returns:
+      a instance of Specification.
+
+    Raises:
+      ValueError: if the store already contains a specification with
+                  the same identifier.
+    """
+    if identifier in self._format_specifications:
+      raise ValueError("specification {0:s} is already defined in "
+                       "store.".format(identifier))
+
+    self._format_specifications[identifier] = Specification(identifier)
+
+    return self._format_specifications[identifier]
+
+  def AddSpecification(self, specification):
+    """Adds a specification.
+
+    Args:
+      specification: the specification (instance of Specification).
+
+    Raises:
+      KeyError: if the store already contains a specification with
+                the same identifier.
+    """
+    if specification.identifier in self._format_specifications:
+      raise KeyError(
+          u'Specification {0:s} is already defined in store.'.format(
+              specification.identifier))
+
+    self._format_specifications[specification.identifier] = specification
+
+  def ReadFromFileObject(self, unused_file_object):
+    """Reads the specification store from a file-like object.
+
+    Args:
+      unused_file_object: A file-like object.
+
+    Raises:
+      RuntimeError: because functionality is not implemented yet.
+    """
+    # TODO: implement this function.
+    raise RuntimeError(u'Function not implemented.')
+
+  def ReadFromFile(self, filename):
+    """Reads the specification store from a file.
+
+    Args:
+      filename: The name of the file.
+    """
+    file_object = open(filename, 'r')
+    self.ReadFromFileObject(file_object)
+    file_object.close()
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the format specification classes."""
+
+import unittest
+
+from plaso.classifier import specification
+
+
+class SpecificationStoreTest(unittest.TestCase):
+  """Class to test the specification store."""
+
+  def testAddSpecification(self):
+    """Function to test the add specification function."""
+    store = specification.SpecificationStore()
+
+    format_regf = specification.Specification('REGF')
+    format_regf.AddNewSignature('regf', offset=0)
+
+    format_esedb = specification.Specification('ESEDB')
+    format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
+
+    store.AddSpecification(format_regf)
+    store.AddSpecification(format_esedb)
+
+    with self.assertRaises(KeyError):
+      store.AddSpecification(format_regf)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Shared test cases."""
+
+from plaso.classifier import specification
+
+
+def CreateSpecificationStore():
+  """Creates a format specification store for testing purposes.
+
+  Returns:
+    A format specification store (instance of SpecificationStore).
+  """
+  store = specification.SpecificationStore()
+
+  test_specification = store.AddNewSpecification('7zip')
+  test_specification.AddMimeType('application/x-7z-compressed')
+  test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive')
+  test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0)
+
+  test_specification = store.AddNewSpecification('esedb')
+  test_specification.AddNewSignature(
+      '\xef\xcd\xab\x89', offset=4, is_bound=True)
+
+  test_specification = store.AddNewSpecification('evt')
+  test_specification.AddNewSignature(
+      '\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0,
+      is_bound=True)
+
+  test_specification = store.AddNewSpecification('evtx')
+  test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('ewf')
+  test_specification.AddNewSignature(
+      'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
+
+  test_specification = specification.Specification('ewf_logical')
+  test_specification.AddNewSignature(
+      'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('lnk')
+  test_specification.AddNewSignature(
+      '\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
+      '\x00\x00\x00\x46', offset=0)
+
+  test_specification = store.AddNewSpecification('msiecf_index_dat')
+  test_specification.AddNewSignature(
+      'Client UrlCache MMF Ver ', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('nk2')
+  test_specification.AddNewSignature(
+      '\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0,
+      is_bound=True)
+
+  test_specification = store.AddNewSpecification('olecf')
+  test_specification.AddNewSignature(
+      '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True)
+  test_specification.AddNewSignature(
+      '\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('pff')
+  test_specification.AddNewSignature('!BDN', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('qcow')
+  test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('rar')
+  test_specification.AddMimeType('application/x-rar-compressed')
+  test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive')
+  test_specification.AddNewSignature(
+      'Rar!\x1a\x07\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('regf')
+  test_specification.AddNewSignature('regf', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('thumbache_db_cache')
+  test_specification.AddNewSignature('CMMM', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('thumbache_db_index')
+  test_specification.AddNewSignature('IMMM', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('zip')
+  test_specification.AddMimeType('application/zip')
+  test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive')
+  # WinZip 8 signature.
+  test_specification.AddNewSignature('PK00', offset=0, is_bound=True)
+  test_specification.AddNewSignature('PK\x01\x02')
+  test_specification.AddNewSignature('PK\x03\x04', offset=0)
+  test_specification.AddNewSignature('PK\x05\x05')
+  # Will be at offset 0 when the archive is empty.
+  test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True)
+  test_specification.AddNewSignature('PK\x06\x06')
+  test_specification.AddNewSignature('PK\x06\x07')
+  test_specification.AddNewSignature('PK\x06\x08')
+  # Will be at offset 0 when this is spanned archive.
+  test_specification.AddNewSignature('PK\x07\x08')
+
+  return store
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,202 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The file format classifier."""
+
+# TODO: rewrite most of the classifier in C and integrate with the code in:
+# plaso/classifier
+
+import gzip
+import logging
+import os
+import tarfile
+import zipfile
+import zlib
+
+from dfvfs.lib import definitions
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.lib import errors
+
+
+class Classifier(object):
+  """Class that defines the file format classifier."""
+
+  _MAGIC_VALUES = {
+      'ZIP': {'length': 4, 'offset': 0, 'values': ['P', 'K', '\x03', '\x04']},
+      'TAR': {'length': 5, 'offset': 257, 'values': ['u', 's', 't', 'a', 'r']},
+      'GZ': {'length': 2, 'offset': 0, 'values': ['\x1f', '\x8b']},
+  }
+
+  # TODO: Remove this logic when the classifier is ready.
+  # This is only used temporary until files can be classified.
+  magic_max_length = 0
+
+  # Defines the maximum depth into a file (for SmartOpenFiles).
+  MAX_FILE_DEPTH = 3
+
+  @classmethod
+  def _SmartOpenFile(cls, file_entry):
+    """Return a generator for all pathspec protobufs extracted from a file.
+
+    If the file is compressed then extract all members and include
+    them into the processing queue.
+
+    Args:
+      file_entry: The file entry object.
+
+    Yields:
+      A path specification (instance of dfvfs.PathSpec) of embedded file
+      entries.
+    """
+    file_object = file_entry.GetFileObject()
+
+    # TODO: Remove when classifier gets deployed. Then we
+    # call the classifier here and use that for definition (and
+    # then we forward the classifier definition in the pathspec
+    # protobuf.
+    file_object.seek(0, os.SEEK_SET)
+
+    if not cls.magic_max_length:
+      for magic_value in cls._MAGIC_VALUES.values():
+        cls.magic_max_length = max(
+            cls.magic_max_length,
+            magic_value['length'] + magic_value['offset'])
+
+    header = file_object.read(cls.magic_max_length)
+
+    file_classification = ''
+    # Go over each and every magic value defined and compare
+    # each read byte (according to original offset and current one)
+    # If all match, then we have a particular file format and we
+    # can move on.
+    for m_value, m_dict in cls._MAGIC_VALUES.items():
+      length = m_dict['length'] + m_dict['offset']
+      if len(header) < length:
+        continue
+
+      offset = m_dict['offset']
+      magic = m_dict['values']
+
+      if header[offset:offset + len(magic)] == ''.join(magic):
+        file_classification = m_value
+        break
+
+    # TODO: refactor the file type specific code into sub functions.
+    if file_classification == 'ZIP':
+      try:
+        file_object.seek(0, os.SEEK_SET)
+        zip_file = zipfile.ZipFile(file_object, 'r')
+
+        # TODO: Make this is a more "sane" check, and perhaps
+        # not entirely skip the file if it has this particular
+        # ending, but for now, this both slows the tool down
+        # considerably and makes it also more unstable.
+        _, _, filename_extension = file_entry.name.rpartition(u'.')
+
+        if filename_extension in [u'.jar', u'.sym', u'.xpi']:
+          file_object.close()
+          logging.debug(
+              u'Unsupported ZIP sub type: {0:s} detected in file: {1:s}'.format(
+                  filename_extension, file_entry.path_spec.comparable))
+          return
+
+        for info in zip_file.infolist():
+          if info.file_size > 0:
+            logging.debug(
+                u'Including: {0:s} from ZIP into process queue.'.format(
+                    info.filename))
+
+            yield path_spec_factory.Factory.NewPathSpec(
+                definitions.TYPE_INDICATOR_ZIP, location=info.filename,
+                parent=file_entry.path_spec)
+
+      except zipfile.BadZipfile:
+        pass
+
+    elif file_classification == 'GZ':
+      try:
+        type_indicator = file_entry.path_spec.type_indicator
+        if type_indicator == definitions.TYPE_INDICATOR_GZIP:
+          raise errors.SameFileType
+
+        file_object.seek(0, os.SEEK_SET)
+        gzip_file = gzip.GzipFile(fileobj=file_object, mode='rb')
+        _ = gzip_file.read(4)
+        gzip_file.close()
+
+        logging.debug((
+            u'Including: {0:s} as GZIP compressed stream into process '
+            u'queue.').format(file_entry.name))
+
+        yield path_spec_factory.Factory.NewPathSpec(
+            definitions.TYPE_INDICATOR_GZIP, parent=file_entry.path_spec)
+
+      except (IOError, zlib.error, errors.SameFileType):
+        pass
+
+    # TODO: Add BZ2 support.
+    elif file_classification == 'TAR':
+      try:
+        file_object.seek(0, os.SEEK_SET)
+        tar_file = tarfile.open(fileobj=file_object, mode='r')
+
+        for name_info in tar_file.getmembers():
+          if not name_info.isfile():
+            continue
+
+          name = name_info.path
+          logging.debug(
+              u'Including: {0:s} from TAR into process queue.'.format(name))
+
+          yield path_spec_factory.Factory.NewPathSpec(
+              definitions.TYPE_INDICATOR_TAR, location=name,
+              parent=file_entry.path_spec)
+
+      except tarfile.ReadError:
+        pass
+
+    file_object.close()
+
+  @classmethod
+  def SmartOpenFiles(cls, file_entry, depth=0):
+    """Generate a list of all available PathSpecs extracted from a file.
+
+    Args:
+      file_entry: A file entry object.
+      depth: Incrementing number that defines the current depth into
+             a file (file inside a ZIP file is depth 1, file inside a tar.gz
+             would be of depth 2).
+
+    Yields:
+      A file entry object (instance of dfvfs.FileEntry).
+    """
+    if depth >= cls.MAX_FILE_DEPTH:
+      return
+
+    for path_spec in cls._SmartOpenFile(file_entry):
+      sub_file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
+      if sub_file_entry is None:
+        logging.debug(
+            u'Unable to open file: {0:s}'.format(path_spec.comparable))
+        continue
+      yield sub_file_entry
+
+      depth += 1
+      for sub_file_entry in cls.SmartOpenFiles(sub_file_entry, depth=depth):
+        yield sub_file_entry
@@ -0,0 +1,421 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generic collector that supports both file system and image files."""
+
+import hashlib
+import logging
+import os
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.lib import errors as dfvfs_errors
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.engine import queue
+from plaso.lib import errors
+
+
+class Collector(queue.ItemQueueProducer):
+  """Class that implements a collector object."""
+
+  def __init__(
+      self, process_queue, source_path, source_path_spec,
+      resolver_context=None):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      source_path: Path of the source file or directory.
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None.
+    """
+    super(Collector, self).__init__(process_queue)
+    self._filter_find_specs = None
+    self._fs_collector = FileSystemCollector(process_queue)
+    self._resolver_context = resolver_context
+    # TODO: remove the need to pass source_path
+    self._source_path = os.path.abspath(source_path)
+    self._source_path_spec = source_path_spec
+    self._vss_stores = None
+
+  def __enter__(self):
+    """Enters a with statement."""
+    return self
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Exits a with statement."""
+    return
+
+  def _ProcessImage(self, volume_path_spec, find_specs=None):
+    """Processes a volume within a storage media image.
+
+    Args:
+      volume_path_spec: The path specification of the volume containing
+                        the file system.
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    if find_specs:
+      logging.debug(u'Collecting from image file: {0:s} with filter'.format(
+          self._source_path))
+    else:
+      logging.debug(u'Collecting from image file: {0:s}'.format(
+          self._source_path))
+
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    try:
+      file_system = path_spec_resolver.Resolver.OpenFileSystem(
+          path_spec, resolver_context=self._resolver_context)
+    except IOError as exception:
+      logging.error(
+          u'Unable to open file system with error: {0:s}'.format(exception))
+      return
+
+    try:
+      self._fs_collector.Collect(
+          file_system, path_spec, find_specs=find_specs)
+    except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+      logging.warning(u'{0:s}'.format(exception))
+
+      if find_specs:
+        logging.debug(u'Collection from image with filter FAILED.')
+      else:
+        logging.debug(u'Collection from image FAILED.')
+      return
+
+    if self._abort:
+      return
+
+    if self._vss_stores:
+      self._ProcessVSS(volume_path_spec, find_specs=find_specs)
+
+    if find_specs:
+      logging.debug(u'Collection from image with filter COMPLETED.')
+    else:
+      logging.debug(u'Collection from image COMPLETED.')
+
+  def _ProcessVSS(self, volume_path_spec, find_specs=None):
+    """Processes a VSS volume within a storage media image.
+
+    Args:
+      volume_path_spec: The path specification of the volume containing
+                        the file system.
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    logging.info(u'Processing VSS.')
+
+    vss_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/',
+        parent=volume_path_spec)
+
+    vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        vss_path_spec, resolver_context=self._resolver_context)
+
+    number_of_vss = vss_file_entry.number_of_sub_file_entries
+
+    # In plaso 1 represents the first store index in dfvfs and pyvshadow 0
+    # represents the first store index so 1 is subtracted.
+    vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]
+
+    for store_index in vss_store_range:
+      if self._abort:
+        return
+
+      if find_specs:
+        logging.info((
+            u'Collecting from VSS volume: {0:d} out of: {1:d} '
+            u'with filter').format(store_index + 1, number_of_vss))
+      else:
+        logging.info(u'Collecting from VSS volume: {0:d} out of: {1:d}'.format(
+            store_index + 1, number_of_vss))
+
+      vss_path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index,
+          parent=volume_path_spec)
+      path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+          parent=vss_path_spec)
+
+      file_system = path_spec_resolver.Resolver.OpenFileSystem(
+          path_spec, resolver_context=self._resolver_context)
+
+      try:
+        self._fs_collector.Collect(
+            file_system, path_spec, find_specs=find_specs)
+      except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+        logging.warning(u'{0:s}'.format(exception))
+
+        if find_specs:
+          logging.debug(
+              u'Collection from VSS store: {0:d} with filter FAILED.'.format(
+                  store_index + 1))
+        else:
+          logging.debug(u'Collection from VSS store: {0:d} FAILED.'.format(
+              store_index + 1))
+        return
+
+      if find_specs:
+        logging.debug(
+            u'Collection from VSS store: {0:d} with filter COMPLETED.'.format(
+                store_index + 1))
+      else:
+        logging.debug(u'Collection from VSS store: {0:d} COMPLETED.'.format(
+            store_index + 1))
+
+  def Collect(self):
+    """Collects files from the source."""
+    source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        self._source_path_spec, resolver_context=self._resolver_context)
+
+    if not source_file_entry:
+      logging.warning(u'No files to collect.')
+      self.SignalEndOfInput()
+      return
+
+    if (not source_file_entry.IsDirectory() and
+        not source_file_entry.IsFile() and
+        not source_file_entry.IsDevice()):
+      raise errors.CollectorError(
+          u'Source path: {0:s} not a device, file or directory.'.format(
+              self._source_path))
+
+    type_indicator = self._source_path_spec.type_indicator
+    if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
+      if source_file_entry.IsFile():
+        self.ProduceItem(self._source_path_spec)
+
+      else:
+        file_system = path_spec_resolver.Resolver.OpenFileSystem(
+            self._source_path_spec, resolver_context=self._resolver_context)
+
+        try:
+          self._fs_collector.Collect(
+              file_system, self._source_path_spec,
+              find_specs=self._filter_find_specs)
+        except (dfvfs_errors.AccessError,
+                dfvfs_errors.BackEndError) as exception:
+          logging.warning(u'{0:s}'.format(exception))
+
+    else:
+      self._ProcessImage(
+          self._source_path_spec.parent, find_specs=self._filter_find_specs)
+
+    self.SignalEndOfInput()
+
+  def SetCollectDirectoryMetadata(self, collect_directory_metadata):
+    """Sets the collect directory metadata flag.
+
+    Args:
+      collect_directory_metadata: Boolean value to indicate to collect
+                                  directory metadata.
+    """
+    self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata)
+
+  def SetFilter(self, filter_find_specs):
+    """Sets the collection filter find specifications.
+
+    Args:
+      filter_find_specs: List of filter find specifications (instances of
+                         dfvfs.FindSpec).
+    """
+    self._filter_find_specs = filter_find_specs
+
+  def SetVssInformation(self, vss_stores):
+    """Sets the Volume Shadow Snapshots (VSS) information.
+
+       This function will enable VSS collection.
+
+    Args:
+      vss_stores: The range of VSS stores to include in the collection,
+                  where 1 represents the first store.
+    """
+    self._vss_stores = vss_stores
+
+  def SignalAbort(self):
+    """Signals the producer to abort."""
+    super(Collector, self).SignalAbort()
+    self._fs_collector.SignalAbort()
+
+
+class FileSystemCollector(queue.ItemQueueProducer):
+  """Class that implements a file system collector object."""
+
+  def __init__(self, process_queue):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+    """
+    super(FileSystemCollector, self).__init__(process_queue)
+    self._collect_directory_metadata = True
+    self._duplicate_file_check = False
+    self._hashlist = {}
+
+    self.number_of_file_entries = 0
+
+  def __enter__(self):
+    """Enters a with statement."""
+    return self
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Exits a with statement."""
+    return
+
+  def _CalculateNTFSTimeHash(self, file_entry):
+    """Return a hash value calculated from a NTFS file's metadata.
+
+    Args:
+      file_entry: The file entry (instance of TSKFileEntry).
+
+    Returns:
+      A hash value (string) that can be used to determine if a file's timestamp
+    value has changed.
+    """
+    stat_object = file_entry.GetStat()
+    ret_hash = hashlib.md5()
+
+    ret_hash.update('atime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'atime', 0),
+        getattr(stat_object, 'atime_nano', 0)))
+
+    ret_hash.update('crtime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'crtime', 0),
+        getattr(stat_object, 'crtime_nano', 0)))
+
+    ret_hash.update('mtime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'mtime', 0),
+        getattr(stat_object, 'mtime_nano', 0)))
+
+    ret_hash.update('ctime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'ctime', 0),
+        getattr(stat_object, 'ctime_nano', 0)))
+
+    return ret_hash.hexdigest()
+
+  def _ProcessDirectory(self, file_entry):
+    """Processes a directory and extract its metadata if necessary."""
+    # Need to do a breadth-first search otherwise we'll hit the Python
+    # maximum recursion depth.
+    sub_directories = []
+
+    for sub_file_entry in file_entry.sub_file_entries:
+      if self._abort:
+        return
+
+      try:
+        if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
+          continue
+      except dfvfs_errors.BackEndError as exception:
+        logging.warning(
+            u'Unable to process file: {0:s} with error: {1:s}'.format(
+                sub_file_entry.path_spec.comparable.replace(
+                    u'\n', u';'), exception))
+        continue
+
+      # For TSK-based file entries only, ignore the virtual /$OrphanFiles
+      # directory.
+      if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
+        if file_entry.IsRoot() and sub_file_entry.name == u'$OrphanFiles':
+          continue
+
+      if sub_file_entry.IsDirectory():
+        # This check is here to improve performance by not producing
+        # path specifications that don't get processed.
+        if self._collect_directory_metadata:
+          self.ProduceItem(sub_file_entry.path_spec)
+          self.number_of_file_entries += 1
+
+        sub_directories.append(sub_file_entry)
+
+      elif sub_file_entry.IsFile():
+        # If we are dealing with a VSS we want to calculate a hash
+        # value based on available timestamps and compare that to previously
+        # calculated hash values, and only include the file into the queue if
+        # the hash does not match.
+        if self._duplicate_file_check:
+          hash_value = self._CalculateNTFSTimeHash(sub_file_entry)
+
+          inode = getattr(sub_file_entry.path_spec, 'inode', 0)
+          if inode in self._hashlist:
+            if hash_value in self._hashlist[inode]:
+              continue
+
+          self._hashlist.setdefault(inode, []).append(hash_value)
+
+        self.ProduceItem(sub_file_entry.path_spec)
+        self.number_of_file_entries += 1
+
+    for sub_file_entry in sub_directories:
+      if self._abort:
+        return
+
+      try:
+        self._ProcessDirectory(sub_file_entry)
+      except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+        logging.warning(u'{0:s}'.format(exception))
+
+  def Collect(self, file_system, path_spec, find_specs=None):
+    """Collects files from the file system.
+
+    Args:
+      file_system: The file system (instance of dfvfs.FileSystem).
+      path_spec: The path specification (instance of dfvfs.PathSpec).
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    if find_specs:
+      searcher = file_system_searcher.FileSystemSearcher(file_system, path_spec)
+
+      for path_spec in searcher.Find(find_specs=find_specs):
+        if self._abort:
+          return
+
+        self.ProduceItem(path_spec)
+        self.number_of_file_entries += 1
+
+    else:
+      file_entry = file_system.GetFileEntryByPathSpec(path_spec)
+
+      self._ProcessDirectory(file_entry)
+
+  def SetCollectDirectoryMetadata(self, collect_directory_metadata):
+    """Sets the collect directory metadata flag.
+
+    Args:
+      collect_directory_metadata: Boolean value to indicate to collect
+                                  directory metadata.
+    """
+    self._collect_directory_metadata = collect_directory_metadata
@@ -0,0 +1,354 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The unit tests for the generic collector object."""
+
+import logging
+import os
+import shutil
+import tempfile
+import unittest
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import context
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.engine import collector
+from plaso.engine import queue
+from plaso.engine import single_process
+from plaso.engine import utils as engine_utils
+
+
+class TempDirectory(object):
+  """A self cleaning temporary directory."""
+
+  def __init__(self):
+    """Initializes the temporary directory."""
+    super(TempDirectory, self).__init__()
+    self.name = u''
+
+  def __enter__(self):
+    """Make this work with the 'with' statement."""
+    self.name = tempfile.mkdtemp()
+    return self.name
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Make this work with the 'with' statement."""
+    shutil.rmtree(self.name, True)
+
+
+class TestCollectorQueueConsumer(queue.ItemQueueConsumer):
+  """Class that implements a test collector queue consumer."""
+
+  def __init__(self, queue_object):
+    """Initializes the queue consumer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(TestCollectorQueueConsumer, self).__init__(queue_object)
+    self.path_specs = []
+
+  def _ConsumeItem(self, path_spec):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      path_spec: a path specification (instance of dfvfs.PathSpec).
+    """
+    self.path_specs.append(path_spec)
+
+  @property
+  def number_of_path_specs(self):
+    """The number of path specifications."""
+    return len(self.path_specs)
+
+  def GetFilePaths(self):
+    """Retrieves a list of file paths from the path specifications."""
+    file_paths = []
+    for path_spec in self.path_specs:
+      location = getattr(path_spec, 'location', None)
+      if location is not None:
+        file_paths.append(location)
+    return file_paths
+
+
+class CollectorTestCase(unittest.TestCase):
+  """The collector test case."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
+
+  # Show full diff results, part of TestCase so does not follow our naming
+  # conventions.
+  maxDiff = None
+
+  def _GetTestFilePath(self, path_segments):
+    """Retrieves the path of a test file relative to the test data directory.
+
+    Args:
+      path_segments: the path segments inside the test data directory.
+
+    Returns:
+      A path of the test file.
+    """
+    # Note that we need to pass the individual path segments to os.path.join
+    # and not a list.
+    return os.path.join(self._TEST_DATA_PATH, *path_segments)
+
+
+class CollectorTest(CollectorTestCase):
+  """Tests for the collector."""
+
+  def testFileSystemCollection(self):
+    """Test collection on the file system."""
+    test_files = [
+        self._GetTestFilePath([u'syslog.tgz']),
+        self._GetTestFilePath([u'syslog.zip']),
+        self._GetTestFilePath([u'syslog.bz2']),
+        self._GetTestFilePath([u'wtmp.1'])]
+
+    with TempDirectory() as dirname:
+      for a_file in test_files:
+        shutil.copy(a_file, dirname)
+
+      path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+
+      test_collection_queue = single_process.SingleProcessQueue()
+      resolver_context = context.Context()
+      test_collector = collector.Collector(
+          test_collection_queue, dirname, path_spec,
+          resolver_context=resolver_context)
+      test_collector.Collect()
+
+      test_collector_queue_consumer = TestCollectorQueueConsumer(
+          test_collection_queue)
+      test_collector_queue_consumer.ConsumeItems()
+
+      self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
+
+  def testFileSystemWithFilterCollection(self):
+    """Test collection on the file system with a filter."""
+    dirname = u'.'
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      temp_file.write('/test_data/testdir/filter_.+.txt\n')
+      temp_file.write('/test_data/.+evtx\n')
+      temp_file.write('/AUTHORS\n')
+      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, dirname, path_spec,
+        resolver_context=resolver_context)
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+    test_collector.SetFilter(find_specs)
+
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning((
+          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
+              filter_name, exception))
+
+    # Two files with test_data/testdir/filter_*.txt, AUTHORS
+    # and test_data/System.evtx.
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
+
+    paths = test_collector_queue_consumer.GetFilePaths()
+
+    current_directory = os.getcwd()
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_1.txt')
+    self.assertTrue(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_2.txt')
+    self.assertFalse(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_3.txt')
+    self.assertTrue(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'AUTHORS')
+    self.assertTrue(expected_path in paths)
+
+  def testImageCollection(self):
+    """Test collection on a storage media image file.
+
+    This images has two files:
+      + logs/hidden.zip
+      + logs/sys.tgz
+
+    The hidden.zip file contains one file, syslog, which is the
+    same for sys.tgz.
+
+    The end results should therefore be:
+      + logs/hidden.zip (unchanged)
+      + logs/hidden.zip:syslog (the text file extracted out)
+      + logs/sys.tgz (unchanged)
+      + logs/sys.tgz (read as a GZIP file, so not compressed)
+      + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
+      + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)
+
+    This means that the collection script should collect 6 files in total.
+    """
+    test_file = self._GetTestFilePath([u'syslog_image.dd'])
+
+    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, test_file, path_spec,
+        resolver_context=resolver_context)
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 3)
+
+  def testImageWithFilterCollection(self):
+    """Test collection on a storage media image file with a filter."""
+    test_file = self._GetTestFilePath([u'ímynd.dd'])
+
+    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      temp_file.write('/a_directory/.+zip\n')
+      temp_file.write('/a_directory/another.+\n')
+      temp_file.write('/passwords.txt\n')
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, test_file, path_spec,
+        resolver_context=resolver_context)
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+    test_collector.SetFilter(find_specs)
+
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning((
+          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
+              filter_name, exception))
+
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
+
+    paths = test_collector_queue_consumer.GetFilePaths()
+
+    # path_specs[0]
+    # type: TSK
+    # file_path: '/a_directory/another_file'
+    # container_path: 'test_data/ímynd.dd'
+    # image_offset: 0
+    self.assertEquals(paths[0], u'/a_directory/another_file')
+
+    # path_specs[1]
+    # type: TSK
+    # file_path: '/passwords.txt'
+    # container_path: 'test_data/ímynd.dd'
+    # image_offset: 0
+    self.assertEquals(paths[1], u'/passwords.txt')
+
+
+class BuildFindSpecsFromFileTest(unittest.TestCase):
+  """Tests for the BuildFindSpecsFromFile function."""
+
+  def testBuildFindSpecsFromFile(self):
+    """Tests the BuildFindSpecsFromFile function."""
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      # 2 hits.
+      temp_file.write('/test_data/testdir/filter_.+.txt\n')
+      # A single hit.
+      temp_file.write('/test_data/.+evtx\n')
+      # A single hit.
+      temp_file.write('/AUTHORS\n')
+      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
+      # This should not compile properly, missing file information.
+      temp_file.write('failing/\n')
+      # This should not fail during initial loading, but fail later on.
+      temp_file.write('bad re (no close on that parenthesis/file\n')
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning(
+          u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
+              filter_name, exception))
+
+    self.assertEquals(len(find_specs), 4)
+
+    dirname = u'.'
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+    file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
+    searcher = file_system_searcher.FileSystemSearcher(
+        file_system, path_spec)
+
+    path_spec_generator = searcher.Find(find_specs=find_specs)
+    self.assertNotEquals(path_spec_generator, None)
+
+    path_specs = list(path_spec_generator)
+    # One evtx, one AUTHORS, two filter_*.txt files, total 4 files.
+    self.assertEquals(len(path_specs), 4)
+
+    with self.assertRaises(IOError):
+      _ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist')
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,319 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The processing engine."""
+
+import abc
+import logging
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.artifacts import knowledge_base
+from plaso.engine import collector
+from plaso.engine import queue
+from plaso.lib import errors
+from plaso.preprocessors import interface as preprocess_interface
+from plaso.preprocessors import manager as preprocess_manager
+
+
+class BaseEngine(object):
+  """Class that defines the processing engine base."""
+
+  def __init__(self, collection_queue, storage_queue, parse_error_queue):
+    """Initialize the engine object.
+
+    Args:
+      collection_queue: the collection queue object (instance of Queue).
+      storage_queue: the storage queue object (instance of Queue).
+      parse_error_queue: the parser error queue object (instance of Queue).
+    """
+    self._collection_queue = collection_queue
+    self._enable_debug_output = False
+    self._enable_profiling = False
+    self._event_queue_producer = queue.ItemQueueProducer(storage_queue)
+    self._filter_object = None
+    self._mount_path = None
+    self._open_files = False
+    self._parse_error_queue = parse_error_queue
+    self._parse_error_queue_producer = queue.ItemQueueProducer(
+        parse_error_queue)
+    self._profiling_sample_rate = 1000
+    self._source = None
+    self._source_path_spec = None
+    self._source_file_entry = None
+    self._text_prepend = None
+
+    self.knowledge_base = knowledge_base.KnowledgeBase()
+    self.storage_queue = storage_queue
+
+  def CreateCollector(
+      self, include_directory_stat, vss_stores=None, filter_find_specs=None,
+      resolver_context=None):
+    """Creates a collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      include_directory_stat: Boolean value to indicate whether directory
+                              stat information should be collected.
+      vss_stores: Optional list of VSS stores to include in the collection,
+                  where 1 represents the first store. Set to None if no
+                  VSS stores should be processed. The default is None.
+      filter_find_specs: Optional list of filter find specifications (instances
+                         of dfvfs.FindSpec). The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      A collector object (instance of Collector).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    collector_object = collector.Collector(
+        self._collection_queue, self._source, self._source_path_spec,
+        resolver_context=resolver_context)
+
+    collector_object.SetCollectDirectoryMetadata(include_directory_stat)
+
+    if vss_stores:
+      collector_object.SetVssInformation(vss_stores)
+
+    if filter_find_specs:
+      collector_object.SetFilter(filter_find_specs)
+
+    return collector_object
+
+  @abc.abstractmethod
+  def CreateExtractionWorker(self, worker_number):
+    """Creates an extraction worker object.
+
+    Args:
+      worker_number: A number that identifies the worker.
+
+    Returns:
+      An extraction worker (instance of worker.ExtractionWorker).
+    """
+
+  def GetSourceFileSystemSearcher(self, resolver_context=None):
+    """Retrieves the file system searcher of the source.
+
+    Args:
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      The file system searcher object (instance of dfvfs.FileSystemSearcher).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    file_system = path_spec_resolver.Resolver.OpenFileSystem(
+        self._source_path_spec, resolver_context=resolver_context)
+
+    type_indicator = self._source_path_spec.type_indicator
+    if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
+      mount_point = self._source_path_spec
+    else:
+      mount_point = self._source_path_spec.parent
+
+    return file_system_searcher.FileSystemSearcher(file_system, mount_point)
+
+  def PreprocessSource(self, platform, resolver_context=None):
+    """Preprocesses the source and fills the preprocessing object.
+
+    Args:
+      platform: string that indicates the platform (operating system).
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+    """
+    searcher = self.GetSourceFileSystemSearcher(
+        resolver_context=resolver_context)
+    if not platform:
+      platform = preprocess_interface.GuessOS(searcher)
+    self.knowledge_base.platform = platform
+
+    preprocess_manager.PreprocessPluginsManager.RunPlugins(
+        platform, searcher, self.knowledge_base)
+
+  def SetEnableDebugOutput(self, enable_debug_output):
+    """Enables or disables debug output.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the debug output
+                           should be enabled.
+    """
+    self._enable_debug_output = enable_debug_output
+
+  def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
+    """Enables or disables profiling.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the profiling
+                           should be enabled.
+      profiling_sample_rate: optional integer indicating the profiling sample
+                             rate. The value contains the number of files
+                             processed. The default value is 1000.
+    """
+    self._enable_profiling = enable_profiling
+    self._profiling_sample_rate = profiling_sample_rate
+
+  def SetFilterObject(self, filter_object):
+    """Sets the filter object.
+
+    Args:
+      filter_object: the filter object (instance of objectfilter.Filter).
+    """
+    self._filter_object = filter_object
+
+  def SetMountPath(self, mount_path):
+    """Sets the mount path.
+
+    Args:
+      mount_path: string containing the mount path.
+    """
+    self._mount_path = mount_path
+
+  # TODO: rename this mode.
+  def SetOpenFiles(self, open_files):
+    """Sets the open files mode.
+
+    Args:
+      open_files: boolean value to indicate if the worker should scan for
+                  file entries inside files.
+    """
+    self._open_files = open_files
+
+  def SetSource(self, source_path_spec, resolver_context=None):
+    """Sets the source.
+
+    Args:
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Raises:
+      BadConfigOption: if source cannot be set.
+    """
+    path_spec = source_path_spec
+    while path_spec.parent:
+      path_spec = path_spec.parent
+
+    # Note that source should be used for output purposes only.
+    self._source = getattr(path_spec, 'location', u'')
+    self._source_path_spec = source_path_spec
+
+    self._source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        self._source_path_spec, resolver_context=resolver_context)
+
+    if not self._source_file_entry:
+      raise errors.BadConfigOption(
+          u'No such device, file or directory: {0:s}.'.format(self._source))
+
+    if (not self._source_file_entry.IsDirectory() and
+        not self._source_file_entry.IsFile() and
+        not self._source_file_entry.IsDevice()):
+      raise errors.CollectorError(
+          u'Source path: {0:s} not a device, file or directory.'.format(
+              self._source))
+
+    if self._source_path_spec.type_indicator in [
+        dfvfs_definitions.TYPE_INDICATOR_OS,
+        dfvfs_definitions.TYPE_INDICATOR_FAKE]:
+
+      if self._source_file_entry.IsFile():
+        logging.debug(u'Starting a collection on a single file.')
+        # No need for multiple workers when parsing a single file.
+
+      elif not self._source_file_entry.IsDirectory():
+        raise errors.BadConfigOption(
+            u'Source: {0:s} has to be a file or directory.'.format(
+                self._source))
+
+  # TODO: remove this functionality.
+  def SetTextPrepend(self, text_prepend):
+    """Sets the text prepend.
+
+    Args:
+      text_prepend: string that contains the text to prepend to every
+                    event object.
+    """
+    self._text_prepend = text_prepend
+
+  def SignalAbort(self):
+    """Signals the engine to abort."""
+    logging.warning(u'Signalled abort.')
+    self._event_queue_producer.SignalEndOfInput()
+    self._parse_error_queue_producer.SignalEndOfInput()
+
+  def SignalEndOfInputStorageQueue(self):
+    """Signals the storage queue no input remains."""
+    self._event_queue_producer.SignalEndOfInput()
+    self._parse_error_queue_producer.SignalEndOfInput()
+
+  def SourceIsDirectory(self):
+    """Determines if the source is a directory.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_file_entry:
+      raise RuntimeError(u'Missing source.')
+
+    return (not self.SourceIsStorageMediaImage() and
+            self._source_file_entry.IsDirectory())
+
+  def SourceIsFile(self):
+    """Determines if the source is a file.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_file_entry:
+      raise RuntimeError(u'Missing source.')
+
+    return (not self.SourceIsStorageMediaImage() and
+            self._source_file_entry.IsFile())
+
+  def SourceIsStorageMediaImage(self):
+    """Determines if the source is storage media image file or device.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    return self._source_path_spec.type_indicator not in [
+        dfvfs_definitions.TYPE_INDICATOR_OS,
+        dfvfs_definitions.TYPE_INDICATOR_FAKE]
@@ -0,0 +1,204 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Queue management implementation for Plaso.
+
+This file contains an implementation of a queue used by plaso for
+queue management.
+
+The queue has been abstracted in order to provide support for different
+implementations of the queueing mechanism, to support multi processing and
+scalability.
+"""
+
+import abc
+
+from plaso.lib import errors
+
+
+class QueueEndOfInput(object):
+  """Class that implements a queue end of input."""
+
+
+class Queue(object):
+  """Class that implements the queue interface."""
+
+  @abc.abstractmethod
+  def __len__(self):
+    """Returns the estimated current number of items in the queue."""
+
+  @abc.abstractmethod
+  def IsEmpty(self):
+    """Determines if the queue is empty."""
+
+  @abc.abstractmethod
+  def PushItem(self, item):
+    """Pushes an item onto the queue."""
+
+  @abc.abstractmethod
+  def PopItem(self):
+    """Pops an item off the queue."""
+
+  def SignalEndOfInput(self):
+    """Signals the queue no input remains."""
+    self.PushItem(QueueEndOfInput())
+
+
+class QueueConsumer(object):
+  """Class that implements the queue consumer interface.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  def __init__(self, queue_object):
+    """Initializes the queue consumer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(QueueConsumer, self).__init__()
+    self._abort = False
+    self._queue = queue_object
+
+  def SignalAbort(self):
+    """Signals the consumer to abort."""
+    self._abort = True
+
+
+class QueueProducer(object):
+  """Class that implements the queue producer interface.
+
+     The producer generates updates on the queue.
+  """
+
+  def __init__(self, queue_object):
+    """Initializes the queue producer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(QueueProducer, self).__init__()
+    self._abort = False
+    self._queue = queue_object
+
+  def SignalAbort(self):
+    """Signals the producer to abort."""
+    self._abort = True
+
+  def SignalEndOfInput(self):
+    """Signals the queue no input remains."""
+    self._queue.SignalEndOfInput()
+
+
+class EventObjectQueueConsumer(QueueConsumer):
+  """Class that implements the event object queue consumer.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  @abc.abstractmethod
+  def _ConsumeEventObject(self, event_object, **kwargs):
+    """Consumes an event object callback for ConsumeEventObjects."""
+
+  def ConsumeEventObjects(self, **kwargs):
+    """Consumes the event object that are pushed on the queue.
+
+       This function will issue a callback to _ConsumeEventObject for every
+       event object (instance of EventObject) consumed from the queue.
+
+    Args:
+      kwargs: keyword arguments to pass to the _ConsumeEventObject callback.
+    """
+    while not self._abort:
+      try:
+        item = self._queue.PopItem()
+      except errors.QueueEmpty:
+        break
+
+      if isinstance(item, QueueEndOfInput):
+        # Push the item back onto the queue to make sure all
+        # queue consumers are stopped.
+        self._queue.PushItem(item)
+        break
+
+      self._ConsumeEventObject(item, **kwargs)
+
+    self._abort = False
+
+
+class ItemQueueConsumer(QueueConsumer):
+  """Class that implements an item queue consumer.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  @abc.abstractmethod
+  def _ConsumeItem(self, item):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      item: the item object.
+    """
+
+  def ConsumeItems(self):
+    """Consumes the items that are pushed on the queue."""
+    while not self._abort:
+      try:
+        item = self._queue.PopItem()
+      except errors.QueueEmpty:
+        break
+
+      if isinstance(item, QueueEndOfInput):
+        # Push the item back onto the queue to make sure all
+        # queue consumers are stopped.
+        self._queue.PushItem(item)
+        break
+
+      self._ConsumeItem(item)
+
+    self._abort = False
+
+
+class ItemQueueProducer(QueueProducer):
+  """Class that implements an item queue producer.
+
+     The producer generates updates on the queue.
+  """
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    return
+
+  def ProduceItem(self, item):
+    """Produces an item onto the queue.
+
+    Args:
+      item: the item object.
+    """
+    try:
+      self._queue.PushItem(item)
+    except errors.QueueFull:
+      self._FlushQueue()
+
+  def ProduceItems(self, items):
+    """Produces items onto the queue.
+
+    Args:
+      items: a list or generator of item objects.
+    """
+    for item in items:
+      self.ProduceItem(item)
@@ -0,0 +1,366 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The single process processing engine."""
+
+import collections
+import logging
+import pdb
+
+from plaso.engine import collector
+from plaso.engine import engine
+from plaso.engine import queue
+from plaso.engine import worker
+from plaso.lib import errors
+from plaso.parsers import context as parsers_context
+
+
+class SingleProcessCollector(collector.Collector):
+  """Class that implements a single process collector object."""
+
+  def __init__(
+      self, process_queue, source_path, source_path_spec,
+      resolver_context=None):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      source_path: Path of the source file or directory.
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None.
+    """
+    super(SingleProcessCollector, self).__init__(
+        process_queue, source_path, source_path_spec,
+        resolver_context=resolver_context)
+
+    self._extraction_worker = None
+    self._fs_collector = SingleProcessFileSystemCollector(process_queue)
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    while not self._queue.IsEmpty():
+      logging.debug(u'Extraction worker started.')
+      self._extraction_worker.Run()
+      logging.debug(u'Extraction worker stopped.')
+
+  def SetExtractionWorker(self, extraction_worker):
+    """Sets the extraction worker.
+
+    Args:
+      extraction_worker: the extraction worker object (instance of
+                         EventExtractionWorker).
+    """
+    self._extraction_worker = extraction_worker
+
+    self._fs_collector.SetExtractionWorker(extraction_worker)
+
+
+class SingleProcessEngine(engine.BaseEngine):
+  """Class that defines the single process engine."""
+
+  def __init__(self, maximum_number_of_queued_items=0):
+    """Initialize the single process engine object.
+
+    Args:
+      maximum_number_of_queued_items: The maximum number of queued items.
+                                      The default is 0, which represents
+                                      no limit.
+    """
+    collection_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+    storage_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+    parse_error_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+
+    super(SingleProcessEngine, self).__init__(
+      collection_queue, storage_queue, parse_error_queue)
+
+    self._event_queue_producer = SingleProcessItemQueueProducer(storage_queue)
+    self._parse_error_queue_producer = SingleProcessItemQueueProducer(
+        parse_error_queue)
+
+  def CreateCollector(
+      self, include_directory_stat, vss_stores=None, filter_find_specs=None,
+      resolver_context=None):
+    """Creates a collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      include_directory_stat: Boolean value to indicate whether directory
+                              stat information should be collected.
+      vss_stores: Optional list of VSS stores to include in the collection,
+                  where 1 represents the first store. Set to None if no
+                  VSS stores should be processed. The default is None.
+      filter_find_specs: Optional list of filter find specifications (instances
+                         of dfvfs.FindSpec). The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      A collector object (instance of Collector).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    collector_object = SingleProcessCollector(
+        self._collection_queue, self._source, self._source_path_spec,
+        resolver_context=resolver_context)
+
+    collector_object.SetCollectDirectoryMetadata(include_directory_stat)
+
+    if vss_stores:
+      collector_object.SetVssInformation(vss_stores)
+
+    if filter_find_specs:
+      collector_object.SetFilter(filter_find_specs)
+
+    return collector_object
+
+  def CreateExtractionWorker(self, worker_number):
+    """Creates an extraction worker object.
+
+    Args:
+      worker_number: A number that identifies the worker.
+
+    Returns:
+      An extraction worker (instance of worker.ExtractionWorker).
+    """
+    parser_context = parsers_context.ParserContext(
+        self._event_queue_producer, self._parse_error_queue_producer,
+        self.knowledge_base)
+
+    extraction_worker = SingleProcessEventExtractionWorker(
+        worker_number, self._collection_queue, self._event_queue_producer,
+        self._parse_error_queue_producer, parser_context)
+
+    extraction_worker.SetEnableDebugOutput(self._enable_debug_output)
+
+    # TODO: move profiler in separate object.
+    extraction_worker.SetEnableProfiling(
+        self._enable_profiling,
+        profiling_sample_rate=self._profiling_sample_rate)
+
+    if self._open_files:
+      extraction_worker.SetOpenFiles(self._open_files)
+
+    if self._filter_object:
+      extraction_worker.SetFilterObject(self._filter_object)
+
+    if self._mount_path:
+      extraction_worker.SetMountPath(self._mount_path)
+
+    if self._text_prepend:
+      extraction_worker.SetTextPrepend(self._text_prepend)
+
+    return extraction_worker
+
+  def ProcessSource(
+      self, collector_object, storage_writer, parser_filter_string=None):
+    """Processes the source and extracts event objects.
+
+    Args:
+      collector_object: A collector object (instance of Collector).
+      storage_writer: A storage writer object (instance of BaseStorageWriter).
+      parser_filter_string: Optional parser filter string. The default is None.
+    """
+    extraction_worker = self.CreateExtractionWorker(0)
+
+    extraction_worker.InitalizeParserObjects(
+         parser_filter_string=parser_filter_string)
+
+    # Set the extraction worker and storage writer values so that they
+    # can be accessed if the QueueFull exception is raised. This is
+    # needed in single process mode to prevent the queue consuming too
+    # much memory.
+    collector_object.SetExtractionWorker(extraction_worker)
+    self._event_queue_producer.SetStorageWriter(storage_writer)
+    self._parse_error_queue_producer.SetStorageWriter(storage_writer)
+
+    logging.debug(u'Processing started.')
+
+    logging.debug(u'Collection started.')
+    collector_object.Collect()
+    logging.debug(u'Collection stopped.')
+
+    logging.debug(u'Extraction worker started.')
+    extraction_worker.Run()
+    logging.debug(u'Extraction worker stopped.')
+
+    self._event_queue_producer.SignalEndOfInput()
+
+    logging.debug(u'Storage writer started.')
+    storage_writer.WriteEventObjects()
+    logging.debug(u'Storage writer stopped.')
+
+    # Reset the extraction worker and storage writer values to return
+    # the objects in their original state. This will prevent access
+    # to the extraction worker outside this function and allow it
+    # to be garbage collected.
+    self._event_queue_producer.SetStorageWriter(None)
+    self._parse_error_queue_producer.SetStorageWriter(None)
+    collector_object.SetExtractionWorker(None)
+
+    logging.debug(u'Processing completed.')
+
+
+class SingleProcessEventExtractionWorker(worker.BaseEventExtractionWorker):
+  """Class that defines the single process event extraction worker."""
+
+  def _DebugParseFileEntry(self):
+    """Callback for debugging file entry parsing failures."""
+    pdb.post_mortem()
+
+
+class SingleProcessFileSystemCollector(collector.FileSystemCollector):
+  """Class that implements a single process file system collector object."""
+
+  def __init__(self, process_queue):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+    """
+    super(SingleProcessFileSystemCollector, self).__init__(process_queue)
+
+    self._extraction_worker = None
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    while not self._queue.IsEmpty():
+      logging.debug(u'Extraction worker started.')
+      self._extraction_worker.Run()
+      logging.debug(u'Extraction worker stopped.')
+
+  def SetExtractionWorker(self, extraction_worker):
+    """Sets the extraction worker.
+
+    Args:
+      extraction_worker: the extraction worker object (instance of
+                         EventExtractionWorker).
+    """
+    self._extraction_worker = extraction_worker
+
+
+class SingleProcessItemQueueProducer(queue.ItemQueueProducer):
+  """Class that implements a single process item queue producer."""
+
+  def __init__(self, queue_object):
+    """Initializes the queue producer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(SingleProcessItemQueueProducer, self).__init__(queue_object)
+
+    self._storage_writer = None
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    logging.debug(u'Storage writer started.')
+    self._storage_writer.WriteEventObjects()
+    logging.debug(u'Storage writer stopped.')
+
+  def SetStorageWriter(self, storage_writer):
+    """Sets the storage writer.
+
+    Args:
+      storage_writer: the storage writer object (instance of
+                      BaseStorageWriter).
+    """
+    self._storage_writer = storage_writer
+
+
+class SingleProcessQueue(queue.Queue):
+  """Single process queue."""
+
+  def __init__(self, maximum_number_of_queued_items=0):
+    """Initializes a single process queue object.
+
+    Args:
+      maximum_number_of_queued_items: The maximum number of queued items.
+                                      The default is 0, which represents
+                                      no limit.
+    """
+    super(SingleProcessQueue, self).__init__()
+
+    # The Queue interface defines the maximum number of queued items to be
+    # 0 if unlimited as does the multi processing queue, but deque uses
+    # None to indicate no limit.
+    if maximum_number_of_queued_items == 0:
+      maximum_number_of_queued_items = None
+
+    # maxlen contains the maximum number of items allowed to be queued,
+    # where None represents unlimited.
+    self._queue = collections.deque(
+        maxlen=maximum_number_of_queued_items)
+
+  def __len__(self):
+    """Returns the estimated current number of items in the queue."""
+    return len(self._queue)
+
+  def IsEmpty(self):
+    """Determines if the queue is empty."""
+    return len(self._queue) == 0
+
+  def PushItem(self, item):
+    """Pushes an item onto the queue.
+
+    Raises:
+      QueueFull: when the queue is full.
+    """
+    number_of_items = len(self._queue)
+
+    # Deque will drop the first item in the queue when maxlen is exceeded.
+    if not self._queue.maxlen or number_of_items < self._queue.maxlen:
+      self._queue.append(item)
+      number_of_items += 1
+
+    if self._queue.maxlen and number_of_items == self._queue.maxlen:
+      raise errors.QueueFull
+
+  def PopItem(self):
+    """Pops an item off the queue.
+
+    Raises:
+      QueueEmpty: when the queue is empty.
+    """
+    try:
+      # Using popleft to have FIFO behavior.
+      return self._queue.popleft()
+    except IndexError:
+      raise errors.QueueEmpty
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests the single process processing engine."""
+
+import os
+import unittest
+
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.helpers import file_system_searcher
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import context
+
+from plaso.engine import single_process
+from plaso.engine import test_lib
+from plaso.lib import errors
+
+
+class SingleProcessQueueTest(unittest.TestCase):
+  """Tests the single process queue."""
+
+  _ITEMS = frozenset(['item1', 'item2', 'item3', 'item4'])
+
+  def testPushPopItem(self):
+    """Tests the PushItem and PopItem functions."""
+    test_queue = single_process.SingleProcessQueue()
+
+    for item in self._ITEMS:
+      test_queue.PushItem(item)
+
+    self.assertEquals(len(test_queue), len(self._ITEMS))
+
+    test_queue.SignalEndOfInput()
+    test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
+    test_queue_consumer.ConsumeItems()
+
+    expected_number_of_items = len(self._ITEMS)
+    self.assertEquals(
+        test_queue_consumer.number_of_items, expected_number_of_items)
+
+  def testQueueEmpty(self):
+    """Tests the queue raises the QueueEmpty exception."""
+    test_queue = single_process.SingleProcessQueue()
+
+    with self.assertRaises(errors.QueueEmpty):
+      test_queue.PopItem()
+
+  def testQueueFull(self):
+    """Tests the queue raises the QueueFull exception."""
+    test_queue = single_process.SingleProcessQueue(
+        maximum_number_of_queued_items=5)
+
+    for item in self._ITEMS:
+      test_queue.PushItem(item)
+
+    with self.assertRaises(errors.QueueFull):
+      test_queue.PushItem('item5')
+
+    with self.assertRaises(errors.QueueFull):
+      test_queue.PushItem('item6')
+
+    test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
+    test_queue_consumer.ConsumeItems()
+
+    expected_number_of_items = len(self._ITEMS)
+    self.assertEquals(
+        test_queue_consumer.number_of_items, expected_number_of_items + 1)
+
+
+class SingleProcessEngineTest(unittest.TestCase):
+  """Tests for the engine object."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
+
+  def testEngine(self):
+    """Test the engine functionality."""
+    resolver_context = context.Context()
+    test_engine = single_process.SingleProcessEngine(
+        maximum_number_of_queued_items=25000)
+
+    self.assertNotEquals(test_engine, None)
+
+    source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
+    os_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
+    source_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=os_path_spec)
+
+    test_engine.SetSource(source_path_spec, resolver_context=resolver_context)
+
+    self.assertFalse(test_engine.SourceIsDirectory())
+    self.assertFalse(test_engine.SourceIsFile())
+    self.assertTrue(test_engine.SourceIsStorageMediaImage())
+
+    test_searcher = test_engine.GetSourceFileSystemSearcher(
+        resolver_context=resolver_context)
+    self.assertNotEquals(test_searcher, None)
+    self.assertIsInstance(
+        test_searcher, file_system_searcher.FileSystemSearcher)
+
+    test_engine.PreprocessSource('Windows')
+
+    test_collector = test_engine.CreateCollector(
+        False, vss_stores=None, filter_find_specs=None,
+        resolver_context=resolver_context)
+    self.assertNotEquals(test_collector, None)
+    self.assertIsInstance(
+        test_collector, single_process.SingleProcessCollector)
+
+    test_extraction_worker = test_engine.CreateExtractionWorker(0)
+    self.assertNotEquals(test_extraction_worker, None)
+    self.assertIsInstance(
+        test_extraction_worker,
+        single_process.SingleProcessEventExtractionWorker)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Engine related functions and classes for testing."""
+
+import os
+import unittest
+
+from plaso.engine import queue
+
+
+class TestQueueConsumer(queue.ItemQueueConsumer):
+  """Class that implements the test queue consumer.
+
+     The queue consumer subscribes to updates on the queue.
+  """
+
+  def __init__(self, test_queue):
+    """Initializes the queue consumer.
+
+    Args:
+      test_queue: the test queue (instance of Queue).
+    """
+    super(TestQueueConsumer, self).__init__(test_queue)
+    self.items = []
+
+  def _ConsumeItem(self, item):
+    """Consumes an item callback for ConsumeItems."""
+    self.items.append(item)
+
+  @property
+  def number_of_items(self):
+    """The number of items."""
+    return len(self.items)
+
+
+class EngineTestCase(unittest.TestCase):
+  """The unit test case for a front-end."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
+
+  # Show full diff results, part of TestCase so does not follow our naming
+  # conventions.
+  maxDiff = None
+
+  def _GetTestFilePath(self, path_segments):
+    """Retrieves the path of a test file relative to the test data directory.
+
+    Args:
+      path_segments: the path segments inside the test data directory.
+
+    Returns:
+      A path of the test file.
+    """
+    # Note that we need to pass the individual path segments to os.path.join
+    # and not a list.
+    return os.path.join(self._TEST_DATA_PATH, *path_segments)
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Engine utility functions."""
+
+import logging
+
+from dfvfs.helpers import file_system_searcher
+
+from plaso.winreg import path_expander
+
+
+def BuildFindSpecsFromFile(filter_file_path, pre_obj=None):
+  """Returns a list of find specification from a filter file.
+
+  Args:
+    filter_file_path: A path to a file that contains find specifications.
+    pre_obj: A preprocessing object (instance of PreprocessObject). This is
+             optional but when provided takes care of expanding each segment.
+  """
+  find_specs = []
+
+  if pre_obj:
+    expander = path_expander.WinRegistryKeyPathExpander()
+
+  with open(filter_file_path, 'rb') as file_object:
+    for line in file_object:
+      line = line.strip()
+      if line.startswith(u'#'):
+        continue
+
+      if pre_obj:
+        try:
+          line = expander.ExpandPath(line, pre_obj=pre_obj)
+        except KeyError as exception:
+          logging.error((
+              u'Unable to use collection filter line: {0:s} with error: '
+              u'{1:s}').format(line, exception))
+          continue
+
+      if not line.startswith(u'/'):
+        logging.warning((
+            u'The filter string must be defined as an abolute path: '
+            u'{0:s}').format(line))
+        continue
+
+      _, _, file_path = line.rstrip().rpartition(u'/')
+      if not file_path:
+        logging.warning(
+            u'Unable to parse the filter string: {0:s}'.format(line))
+        continue
+
+      # Convert the filter paths into a list of path segments and strip
+      # the root path segment.
+      path_segments = line.split(u'/')
+      path_segments.pop(0)
+
+      find_specs.append(file_system_searcher.FindSpec(
+          location_regex=path_segments, case_sensitive=False))
+
+  return find_specs
@@ -0,0 +1,352 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The event extraction worker."""
+
+import logging
+import os
+
+from dfvfs.resolver import context
+from dfvfs.resolver import resolver as path_spec_resolver
+
+try:
+  from guppy import hpy
+except ImportError:
+  hpy = None
+
+from plaso.engine import classifier
+from plaso.engine import queue
+from plaso.lib import errors
+from plaso.parsers import manager as parsers_manager
+
+
+class BaseEventExtractionWorker(queue.ItemQueueConsumer):
+  """Class that defines the event extraction worker base.
+
+  This class is designed to watch a queue for path specifications of files
+  and directories (file entries) for which events need to be extracted.
+
+  The event extraction worker needs to determine if a parser suitable
+  for parsing a particular file is available. All extracted event objects
+  are pushed on a storage queue for further processing.
+  """
+
+  def __init__(
+      self, identifier, process_queue, event_queue_producer,
+      parse_error_queue_producer, parser_context):
+    """Initializes the event extraction worker object.
+
+    Args:
+      identifier: The identifier, usually an incrementing integer.
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      event_queue_producer: The event object queue producer (instance of
+                            ItemQueueProducer).
+      parse_error_queue_producer: The parse error queue producer (instance of
+                                  ItemQueueProducer).
+      parser_context: A parser context object (instance of ParserContext).
+    """
+    super(BaseEventExtractionWorker, self).__init__(process_queue)
+    self._enable_debug_output = False
+    self._identifier = identifier
+    self._open_files = False
+    self._parser_context = parser_context
+    self._filestat_parser_object = None
+    self._parser_objects = None
+
+    # We need a resolver context per process to prevent multi processing
+    # issues with file objects stored in images.
+    self._resolver_context = context.Context()
+    self._event_queue_producer = event_queue_producer
+    self._parse_error_queue_producer = parse_error_queue_producer
+
+    # Attributes that contain the current status of the worker.
+    self._current_working_file = u''
+    self._is_running = False
+
+    # Attributes for profiling.
+    self._enable_profiling = False
+    self._heapy = None
+    self._profiling_sample = 0
+    self._profiling_sample_rate = 1000
+    self._profiling_sample_file = u'{0!s}.hpy'.format(self._identifier)
+
+  def _ConsumeItem(self, path_spec):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      path_spec: a path specification (instance of dfvfs.PathSpec).
+    """
+    file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        path_spec, resolver_context=self._resolver_context)
+
+    if file_entry is None:
+      logging.warning(u'Unable to open file entry: {0:s}'.format(
+          path_spec.comparable))
+      return
+
+    try:
+      self.ParseFileEntry(file_entry)
+    except IOError as exception:
+      logging.warning(u'Unable to parse file: {0:s} with error: {1:s}'.format(
+          path_spec.comparable, exception))
+
+  def _DebugParseFileEntry(self):
+    """Callback for debugging file entry parsing failures."""
+    return
+
+  def _ParseFileEntryWithParser(self, parser_object, file_entry):
+    """Parses a file entry with a specific parser.
+
+    Args:
+      parser_object: A parser object (instance of BaseParser).
+      file_entry: A file entry object (instance of dfvfs.FileEntry).
+
+    Raises:
+      QueueFull: If a queue is full.
+    """
+    try:
+      parser_object.Parse(self._parser_context, file_entry)
+
+    except errors.UnableToParseFile as exception:
+      logging.debug(u'Not a {0:s} file ({1:s}) - {2:s}'.format(
+          parser_object.NAME, file_entry.name, exception))
+
+    except errors.QueueFull:
+      raise
+
+    except IOError as exception:
+      logging.debug(
+          u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format(
+              parser_object.NAME, file_entry.path_spec.comparable,
+              exception))
+
+    # Casting a wide net, catching all exceptions. Done to keep the worker
+    # running, despite the parser hitting errors, so the worker doesn't die
+    # if a single file is corrupted or there is a bug in a parser.
+    except Exception as exception:
+      logging.warning(
+          u'[{0:s}] Unable to process file: {1:s} with error: {2:s}.'.format(
+              parser_object.NAME, file_entry.path_spec.comparable,
+              exception))
+      logging.debug(
+          u'The path specification that caused the error: {0:s}'.format(
+              file_entry.path_spec.comparable))
+      logging.exception(exception)
+
+      if self._enable_debug_output:
+        self._DebugParseFileEntry()
+
+  def _ProfilingStart(self):
+    """Starts the profiling."""
+    self._heapy.setrelheap()
+    self._profiling_sample = 0
+
+    try:
+      os.remove(self._profiling_sample_file)
+    except OSError:
+      pass
+
+  def _ProfilingStop(self):
+    """Stops the profiling."""
+    self._ProfilingWriteSample()
+
+  def _ProfilingUpdate(self):
+    """Updates the profiling."""
+    self._profiling_sample += 1
+
+    if self._profiling_sample >= self._profiling_sample_rate:
+      self._ProfilingWriteSample()
+      self._profiling_sample = 0
+
+  def _ProfilingWriteSample(self):
+    """Writes a profiling sample to the sample file."""
+    heap = self._heapy.heap()
+    heap.dump(self._profiling_sample_file)
+
+  def GetStatus(self):
+    """Returns a status dictionary."""
+    return {
+        'is_running': self._is_running,
+        'identifier': u'Worker_{0:d}'.format(self._identifier),
+        'current_file': self._current_working_file,
+        'counter': self._parser_context.number_of_events}
+
+  def InitalizeParserObjects(self, parser_filter_string=None):
+    """Initializes the parser objects.
+
+    The parser_filter_string is a simple comma separated value string that
+    denotes a list of parser names to include and/or exclude. Each entry
+    can have the value of:
+      + Exact match of a list of parsers, or a preset (see
+        plaso/frontend/presets.py for a full list of available presets).
+      + A name of a single parser (case insensitive), eg. msiecfparser.
+      + A glob name for a single parser, eg: '*msie*' (case insensitive).
+
+    Args:
+      parser_filter_string: Optional parser filter string. The default is None.
+    """
+    self._parser_objects = parsers_manager.ParsersManager.GetParserObjects(
+        parser_filter_string=parser_filter_string)
+
+    for parser_object in self._parser_objects:
+      if parser_object.NAME == 'filestat':
+        self._filestat_parser_object = parser_object
+        break
+
+  def ParseFileEntry(self, file_entry):
+    """Parses a file entry.
+
+    Args:
+      file_entry: A file entry object (instance of dfvfs.FileEntry).
+    """
+    logging.debug(u'[ParseFileEntry] Parsing: {0:s}'.format(
+        file_entry.path_spec.comparable))
+
+    self._current_working_file = getattr(
+        file_entry.path_spec, u'location', file_entry.name)
+
+    if file_entry.IsDirectory() and self._filestat_parser_object:
+      self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry)
+
+    elif file_entry.IsFile():
+      # TODO: Not go through all parsers, just the ones
+      # that the classifier classifies the file as.
+
+      for parser_object in self._parser_objects:
+        logging.debug(u'Trying to parse: {0:s} with parser: {1:s}'.format(
+            file_entry.name, parser_object.NAME))
+
+        self._ParseFileEntryWithParser(parser_object, file_entry)
+
+    logging.debug(u'[ParseFileEntry] Done parsing: {0:s}'.format(
+        file_entry.path_spec.comparable))
+
+    if self._enable_profiling:
+      self._ProfilingUpdate()
+
+    if self._open_files:
+      try:
+        for sub_file_entry in classifier.Classifier.SmartOpenFiles(file_entry):
+          if self._abort:
+            break
+
+          self.ParseFileEntry(sub_file_entry)
+
+      except IOError as exception:
+        logging.warning(
+            u'Unable to parse file: {0:s} with error: {1:s}'.format(
+                file_entry.path_spec.comparable, exception))
+
+  def Run(self):
+    """Extracts event objects from file entries."""
+    self._parser_context.ResetCounters()
+
+    if self._enable_profiling:
+      self._ProfilingStart()
+
+    self._is_running = True
+
+    logging.info(
+        u'Worker {0:d} (PID: {1:d}) started monitoring process queue.'.format(
+            self._identifier, os.getpid()))
+
+    self.ConsumeItems()
+
+    logging.info(
+        u'Worker {0:d} (PID: {1:d}) stopped monitoring process queue.'.format(
+            self._identifier, os.getpid()))
+
+    self._current_working_file = u''
+
+    self._is_running = False
+
+    if self._enable_profiling:
+      self._ProfilingStop()
+
+    self._resolver_context.Empty()
+
+  def SetEnableDebugOutput(self, enable_debug_output):
+    """Enables or disables debug output.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the debug output
+                           should be enabled.
+    """
+    self._enable_debug_output = enable_debug_output
+
+  def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
+    """Enables or disables profiling.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the profiling
+                           should be enabled.
+      profiling_sample_rate: optional integer indicating the profiling sample
+                             rate. The value contains the number of files
+                             processed. The default value is 1000.
+    """
+    if hpy:
+      self._enable_profiling = enable_profiling
+      self._profiling_sample_rate = profiling_sample_rate
+
+    if self._enable_profiling and not self._heapy:
+      self._heapy = hpy()
+
+  def SetFilterObject(self, filter_object):
+    """Sets the filter object.
+
+    Args:
+      filter_object: the filter object (instance of objectfilter.Filter).
+    """
+    self._parser_context.SetFilterObject(filter_object)
+
+  def SetMountPath(self, mount_path):
+    """Sets the mount path.
+
+    Args:
+      mount_path: string containing the mount path.
+    """
+    self._parser_context.SetMountPath(mount_path)
+
+  # TODO: rename this mode.
+  def SetOpenFiles(self, open_files):
+    """Sets the open files mode.
+
+    Args:
+      open_files: boolean value to indicate if the worker should scan for
+                  file entries inside files.
+    """
+    self._open_files = open_files
+
+  def SetTextPrepend(self, text_prepend):
+    """Sets the text prepend.
+
+    Args:
+      text_prepend: string that contains the text to prepend to every
+                    event object.
+    """
+    self._parser_context.SetTextPrepend(text_prepend)
+
+  def SignalAbort(self):
+    """Signals the worker to abort."""
+    super(BaseEventExtractionWorker, self).SignalAbort()
+    self._parser_context.SignalAbort()
+
+  @classmethod
+  def SupportsProfiling(cls):
+    """Returns a boolean value to indicate if profiling is supported."""
+    return hpy is not None
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,92 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file is the template for Plist events."""
+
+from plaso.events import time_events
+from plaso.lib import eventdata
+
+
+class PlistEvent(time_events.PythonDatetimeEvent):
+  """Convenience class for a plist events."""
+
+  DATA_TYPE = 'plist:key'
+
+  def __init__(self, root, key, timestamp, desc=None, host=None, user=None):
+    """Template for creating a Plist EventObject for returning data to Plaso.
+
+    All events extracted from files get passed around Plaso internally as an
+    EventObject. PlistEvent is an EventObject with attributes specifically
+    relevant to data extracted from a Plist file. The attribute DATA_TYPE
+    'plist:key' allows the formatter used during output to identify
+    the appropriate formatter for converting these attributes to output.
+
+    Args:
+      root: A string representing the path from the root to this key.
+      key: A string representing the name of key.
+      timestamp: The date object (instance of datetime.datetime).
+      desc: An optional string intended for the user describing the event.
+      host: An optional host name if one is available within the log file.
+      user: An optional user name if one is available within the log file.
+    """
+    super(PlistEvent, self).__init__(
+        timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
+
+    self.root = root
+    self.key = key
+    if desc:
+      self.desc = desc
+    if host:
+      self.hostname = host
+    if user:
+      self.username = user
+
+
+class PlistTimeEvent(time_events.TimestampEvent):
+  """Convenience class for a plist event that does not use datetime objects."""
+
+  DATA_TYPE = 'plist:key'
+
+  def __init__(self, root, key, timestamp, desc=None, host=None, user=None):
+    """Template for creating a Plist EventObject for returning data to Plaso.
+
+    All events extracted from files get passed around Plaso internally as an
+    EventObject. PlistEvent is an EventObject with attributes specifically
+    relevant to data extracted from a Plist file. The attribute DATA_TYPE
+    'plist:key' allows the formatter used during output to identify
+    the appropriate formatter for converting these attributes to output.
+
+    Args:
+      root: A string representing the path from the root to this key.
+      key: A string representing the name of key.
+      timestamp: The timestamp time value. The timestamp contains the
+                 number of microseconds since Jan 1, 1970 00:00:00 UTC.
+      desc: An optional string intended for the user describing the event.
+      host: An optional host name if one is available within the log file.
+      user: An optional user name if one is available within the log file.
+    """
+    super(PlistTimeEvent, self).__init__(
+        timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
+
+    self.root = root
+    self.key = key
+    if desc:
+      self.desc = desc
+    if host:
+      self.hostname = host
+    if user:
+      self.username = user
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the shell item specific event object classes."""
+
+from plaso.events import time_events
+
+
+class ShellItemFileEntryEvent(time_events.FatDateTimeEvent):
+  """Convenience class for a shell item file entry event."""
+
+  DATA_TYPE = 'windows:shell_item:file_entry'
+
+  def __init__(
+      self, fat_date_time, usage, name, long_name, localized_name,
+      file_reference, origin):
+    """Initializes an event object.
+
+    Args:
+      fat_date_time: The FAT date time value.
+      usage: The description of the usage of the time value.
+      name: A string containing the name of the file entry shell item.
+      long_name: A string containing the long name of the file entry shell item.
+      localized_name: A string containing the localized name of the file entry
+                      shell item.
+      file_reference: A string containing the NTFS file reference
+                      (MTF entry - sequence number).
+      origin: A string containing the origin of the event (event source).
+    """
+    super(ShellItemFileEntryEvent, self).__init__(fat_date_time, usage)
+
+    self.name = name
+    self.long_name = long_name
+    self.localized_name = localized_name
+    self.file_reference = file_reference
+    self.origin = origin
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the text format specific event object classes."""
+
+from plaso.events import time_events
+from plaso.lib import eventdata
+
+
+class TextEvent(time_events.TimestampEvent):
+  """Convenience class for a text format-based event."""
+
+  DATA_TYPE = 'text:entry'
+
+  def __init__(self, timestamp, offset, attributes):
+    """Initializes a text event object.
+
+    Args:
+      timestamp: The timestamp time value. The timestamp contains the
+                 number of microseconds since Jan 1, 1970 00:00:00 UTC.
+      offset: The offset of the attributes.
+      attributes: A dict that contains the events attributes.
+    """
+    super(TextEvent, self).__init__(
+        timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
+
+    self.offset = offset
+
+    for name, value in attributes.iteritems():
+      # TODO: Revisit this constraints and see if we can implement
+      # it using a more sane solution.
+      if isinstance(value, basestring) and not value:
+        continue
+      setattr(self, name, value)
@@ -0,0 +1,157 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the time-based event object classes."""
+
+from plaso.lib import event
+from plaso.lib import timelib
+
+
+class TimestampEvent(event.EventObject):
+  """Convenience class for a timestamp-based event."""
+
+  def __init__(self, timestamp, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      timestamp: The timestamp value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(TimestampEvent, self).__init__()
+    self.timestamp = timestamp
+    self.timestamp_desc = usage
+
+    if data_type:
+      self.data_type = data_type
+
+
+class CocoaTimeEvent(TimestampEvent):
+  """Convenience class for a Cocoa time-based event."""
+
+  def __init__(self, cocoa_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      cocoa_time: The Cocoa time value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(CocoaTimeEvent, self).__init__(
+        timelib.Timestamp.FromCocoaTime(cocoa_time), usage,
+        data_type=data_type)
+
+
+class FatDateTimeEvent(TimestampEvent):
+  """Convenience class for a FAT date time-based event."""
+
+  def __init__(self, fat_date_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      fat_date_time: The FAT date time value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(FatDateTimeEvent, self).__init__(
+        timelib.Timestamp.FromFatDateTime(fat_date_time), usage,
+        data_type=data_type)
+
+
+class FiletimeEvent(TimestampEvent):
+  """Convenience class for a FILETIME timestamp-based event."""
+
+  def __init__(self, filetime, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      filetime: The FILETIME timestamp value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(FiletimeEvent, self).__init__(
+        timelib.Timestamp.FromFiletime(filetime), usage, data_type=data_type)
+
+
+class JavaTimeEvent(TimestampEvent):
+  """Convenience class for a Java time-based event."""
+
+  def __init__(self, java_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      java_time: The Java time value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(JavaTimeEvent, self).__init__(
+        timelib.Timestamp.FromJavaTime(java_time), usage, data_type=data_type)
+
+
+class PosixTimeEvent(TimestampEvent):
+  """Convenience class for a POSIX time-based event."""
+
+  def __init__(self, posix_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      posix_time: The POSIX time value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(PosixTimeEvent, self).__init__(
+        timelib.Timestamp.FromPosixTime(posix_time), usage, data_type=data_type)
+
+
+class PythonDatetimeEvent(TimestampEvent):
+  """Convenience class for a Python DateTime time-based event."""
+
+  def __init__(self, datetime_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      datetime_time: The datetime object (instance of datetime.datetime).
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(PythonDatetimeEvent, self).__init__(
+        timelib.Timestamp.FromPythonDatetime(datetime_time), usage,
+        data_type=data_type)
+
+
+class WebKitTimeEvent(TimestampEvent):
+  """Convenience class for a WebKit time-based event."""
+
+  def __init__(self, webkit_time, usage, data_type=None):
+    """Initializes an event object.
+
+    Args:
+      webkit_time: The WebKit time value.
+      usage: The description of the usage of the time value.
+      data_type: Optional event data type. If not set data_type is
+                 derived from the DATA_TYPE attribute.
+    """
+    super(WebKitTimeEvent, self).__init__(
+        timelib.Timestamp.FromWebKitTime(webkit_time), usage,
+        data_type=data_type)
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the Windows specific event object classes."""
+
+from plaso.events import time_events
+from plaso.lib import eventdata
+
+
+class WindowsVolumeCreationEvent(time_events.FiletimeEvent):
+  """Convenience class for a Windows volume creation event."""
+
+  DATA_TYPE = 'windows:volume:creation'
+
+  def __init__(self, filetime, device_path, serial_number, origin):
+    """Initializes an event object.
+
+    Args:
+      filetime: The FILETIME timestamp value.
+      device_path: A string containing the volume device path.
+      serial_number: A string containing the volume serial number.
+      origin: A string containing the origin of the event (event source).
+    """
+    super(WindowsVolumeCreationEvent, self).__init__(
+        filetime, eventdata.EventTimestamp.CREATION_TIME)
+
+    self.device_path = device_path
+    self.serial_number = serial_number
+    self.origin = origin
+
+
+class WindowsRegistryEvent(time_events.TimestampEvent):
+  """Convenience class for a Windows Registry-based event."""
+
+  DATA_TYPE = 'windows:registry:key_value'
+
+  def __init__(
+      self, timestamp, key_name, value_dict, usage=None, offset=None,
+      registry_type=None, urls=None, source_append=None):
+    """Initializes a Windows registry event.
+
+    Args:
+      timestamp: The timestamp time value. The timestamp contains the
+                 number of microseconds since Jan 1, 1970 00:00:00 UTC.
+      key_name: The name of the Registry key being parsed.
+      value_dict: The interpreted value of the key, stored as a dictionary.
+      usage: Optional description of the usage of the time value.
+             The default is None.
+      offset: Optional (data) offset of the Registry key or value.
+              The default is None.
+      registry_type: Optional Registry type string. The default is None.
+      urls: Optional list of URLs. The default is None.
+      source_append: Optional string to append to the source_long of the event.
+                     The default is None.
+    """
+    if usage is None:
+      usage = eventdata.EventTimestamp.WRITTEN_TIME
+
+    super(WindowsRegistryEvent, self).__init__(timestamp, usage)
+
+    if key_name:
+      self.keyname = key_name
+
+    self.regvalue = value_dict
+
+    if offset or type(offset) in [int, long]:
+      self.offset = offset
+
+    if registry_type:
+      self.registry_type = registry_type
+
+    if urls:
+      self.url = u' - '.join(urls)
+
+    if source_append:
+      self.source_append = source_append
+
+
+class WindowsRegistryServiceEvent(WindowsRegistryEvent):
+  """Convenience class for service entries retrieved from the registry."""
+  DATA_TYPE = 'windows:registry:service'
@@ -0,0 +1,56 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains an import statement for each filter."""
+import logging
+
+from plaso.filters import dynamic_filter
+from plaso.filters import eventfilter
+from plaso.filters import filterlist
+
+from plaso.lib import filter_interface
+from plaso.lib import errors
+
+
+def ListFilters():
+  """Generate a list of all available filters."""
+  filters = []
+  for cl in filter_interface.FilterObject.classes:
+    filters.append(filter_interface.FilterObject.classes[cl]())
+
+  return filters
+
+
+def GetFilter(filter_string):
+  """Returns the first filter that matches the filter string.
+
+  Args:
+    filter_string: A filter string for any of the available filters.
+
+  Returns:
+   The first FilterObject found matching the filter string. If no FilterObject
+   is available for this filter string None is returned.
+  """
+  if not filter_string:
+    return
+
+  for filter_obj in ListFilters():
+    try:
+      filter_obj.CompileFilter(filter_string)
+      return filter_obj
+    except errors.WrongPlugin:
+      logging.debug(u'Filterstring [{}] is not a filter: {}'.format(
+          filter_string, filter_obj.filter_name))
@@ -0,0 +1,162 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains definition for a selective fields EventObjectFilter."""
+from plaso.lib import errors
+from plaso.lib import lexer
+from plaso.filters import eventfilter
+
+
+class SelectiveLexer(lexer.Lexer):
+  """A simple selective filter lexer implementation."""
+
+  tokens = [
+      lexer.Token('INITIAL', r'SELECT', '', 'FIELDS'),
+      lexer.Token('FIELDS', r'(.+) WHERE ', 'SetFields', 'FILTER'),
+      lexer.Token('FIELDS', r'(.+) LIMIT', 'SetFields', 'LIMIT_END'),
+      lexer.Token('FIELDS', r'(.+) SEPARATED BY', 'SetFields', 'SEPARATE'),
+      lexer.Token('FIELDS', r'(.+)$', 'SetFields', 'END'),
+      lexer.Token('FILTER', r'(.+) SEPARATED BY', 'SetFilter', 'SEPARATE'),
+      lexer.Token('FILTER', r'(.+) LIMIT', 'SetFilter', 'LIMIT_END'),
+      lexer.Token('FILTER', r'(.+)$', 'SetFilter', 'END'),
+      lexer.Token('SEPARATE', r' ', '', ''),  # Ignore white space here.
+      lexer.Token('SEPARATE', r'LIMIT', '', 'LIMIT_END'),
+      lexer.Token(
+          'SEPARATE', r'[\'"]([^ \'"]+)[\'"] LIMIT', 'SetSeparator',
+          'LIMIT_END'),
+      lexer.Token(
+          'SEPARATE', r'[\'"]([^ \'"]+)[\'"]$', 'SetSeparator', 'END'),
+      lexer.Token(
+          'SEPARATE', r'(.+)$', 'SetSeparator', 'END'),
+      lexer.Token(
+          'LIMIT_END', r'SEPARATED BY [\'"]([^\'"]+)[\'"]', 'SetSeparator', ''),
+      lexer.Token('LIMIT_END', r'(.+) SEPARATED BY', 'SetLimit', 'SEPARATE'),
+      lexer.Token('LIMIT_END', r'(.+)$', 'SetLimit', 'END')]
+
+  def __init__(self, data=''):
+    """Initialize the lexer."""
+    self.fields = []
+    self.limit = 0
+    self.lex_filter = None
+    self.separator = u','
+    super(SelectiveLexer, self).__init__(data)
+
+  def SetFilter(self, match, **_):
+    """Set the filter query."""
+    filter_match = match.group(1)
+    if 'LIMIT' in filter_match:
+      # This only occurs in the case where we have "LIMIT X SEPARATED BY".
+      self.lex_filter, _, push_back = filter_match.rpartition('LIMIT')
+      self.PushBack('LIMIT {} SEPARATED BY '.format(push_back))
+    else:
+      self.lex_filter = filter_match
+
+  def SetSeparator(self, match, **_):
+    """Set the separator of the output, only uses the first char."""
+    separator = match.group(1)
+    if separator:
+      self.separator = separator[0]
+
+  def SetLimit(self, match, **_):
+    """Set the row limit."""
+    try:
+      limit = int(match.group(1))
+    except ValueError:
+      self.Error('Invalid limit value, should be int [{}] = {}'.format(
+          type(match.group(1)), match.group(1)))
+      limit = 0
+
+    self.limit = limit
+
+  def SetFields(self, match, **_):
+    """Set the selective fields."""
+    text = match.group(1).lower()
+    field_text, _, _ = text.partition(' from ')
+
+    use_field_text = field_text.replace(' ', '')
+    if ',' in use_field_text:
+      self.fields = use_field_text.split(',')
+    else:
+      self.fields = [use_field_text]
+
+
+class DynamicFilter(eventfilter.EventObjectFilter):
+  """A twist to the EventObjectFilter allowing output fields to be selected.
+
+  This filter is essentially the same as the EventObjectFilter except it wraps
+  it in a selection of which fields should be included by an output module that
+  has support for selective fields. That is to say the filter:
+
+    SELECT field_a, field_b WHERE attribute contains 'text'
+
+  Will use the EventObjectFilter "attribute contains 'text'" and at the same
+  time indicate to the appropriate output module that the user wants only the
+  fields field_a and field_b to be used in the output.
+  """
+
+  @property
+  def fields(self):
+    """Set the fields property."""
+    return self._fields
+
+  @property
+  def limit(self):
+    """Return the limit of row counts."""
+    return self._limit
+
+  @property
+  def separator(self):
+    """Return the separator value."""
+    return self._separator
+
+  def __init__(self):
+    """Initialize the selective EventObjectFilter."""
+    super(DynamicFilter, self).__init__()
+    self._fields = []
+    self._limit = 0
+    self._separator = u','
+
+  def CompileFilter(self, filter_string):
+    """Compile the filter string into a EventObjectFilter matcher."""
+    lex = SelectiveLexer(filter_string)
+
+    _ = lex.NextToken()
+    if lex.error:
+      raise errors.WrongPlugin('Malformed filter string.')
+
+    _ = lex.NextToken()
+    if lex.error:
+      raise errors.WrongPlugin('No fields defined.')
+
+    if lex.state is not 'END':
+      while lex.state is not 'END':
+        _ = lex.NextToken()
+        if lex.error:
+          raise errors.WrongPlugin('No filter defined for DynamicFilter.')
+
+    if lex.state != 'END':
+      raise errors.WrongPlugin(
+          'Malformed DynamicFilter, end state not reached.')
+
+    self._fields = lex.fields
+    self._limit = lex.limit
+    self._separator = unicode(lex.separator)
+
+    if lex.lex_filter:
+      super(DynamicFilter, self).CompileFilter(lex.lex_filter)
+    else:
+      self.matcher = None
+
@@ -0,0 +1,85 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the DynamicFilter filter."""
+import unittest
+
+from plaso.filters import dynamic_filter
+from plaso.filters import test_helper
+
+
+class DynamicFilterTest(test_helper.FilterTestHelper):
+  """Tests for the DynamicFilter filter."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self.test_filter = dynamic_filter.DynamicFilter()
+
+  def testFilterFail(self):
+    """Run few tests that should not be a proper filter."""
+    self.TestFail('/tmp/file_that_most_likely_does_not_exist')
+    self.TestFail('some random stuff that is destined to fail')
+    self.TestFail('some_stuff is "random" and other_stuff ')
+    self.TestFail('some_stuff is "random" and other_stuff is not "random"')
+    self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
+    self.TestFail('SELECT field_a, field_b WHERE ')
+    self.TestFail('SELECT field_a, field_b SEPARATED BY')
+    self.TestFail('SELECT field_a, SEPARATED BY field_b WHERE ')
+    self.TestFail('SELECT field_a, field_b LIMIT WHERE')
+
+  def testFilterApprove(self):
+    self.TestTrue('SELECT stuff FROM machine WHERE some_stuff is "random"')
+    self.TestTrue('SELECT field_a, field_b, field_c')
+    self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "%"')
+    self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10')
+    self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10 SEPARATED BY "|"')
+    self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "|" LIMIT 10')
+    self.TestTrue('SELECT field_a, field_b, field_c WHERE date > "2012"')
+    self.TestTrue(
+        'SELECT field_a, field_b, field_c WHERE date > "2012" LIMIT 100')
+    self.TestTrue((
+        'SELECT field_a, field_b, field_c WHERE date > "2012" SEPARATED BY "@"'
+        ' LIMIT 100'))
+    self.TestTrue((
+        'SELECT parser, date, time WHERE some_stuff is "random" and '
+        'date < "2021-02-14 14:51:23"'))
+
+  def testFilterFields(self):
+    query = 'SELECT stuff FROM machine WHERE some_stuff is "random"'
+    self.test_filter.CompileFilter(query)
+    self.assertEquals(['stuff'], self.test_filter.fields)
+
+    query = 'SELECT stuff, a, b, date FROM machine WHERE some_stuff is "random"'
+    self.test_filter.CompileFilter(query)
+    self.assertEquals(['stuff', 'a', 'b', 'date'], self.test_filter.fields)
+
+    query = 'SELECT date, message, zone, hostname WHERE some_stuff is "random"'
+    self.test_filter.CompileFilter(query)
+    self.assertEquals(['date', 'message', 'zone', 'hostname'],
+                      self.test_filter.fields)
+
+    query = 'SELECT hlutir'
+    self.test_filter.CompileFilter(query)
+    self.assertEquals(['hlutir'], self.test_filter.fields)
+
+    query = 'SELECT hlutir LIMIT 10'
+    self.test_filter.CompileFilter(query)
+    self.assertEquals(['hlutir'], self.test_filter.fields)
+    self.assertEquals(10, self.test_filter.limit)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains definition for a simple filter."""
+from plaso.lib import errors
+from plaso.lib import filter_interface
+from plaso.lib import pfilter
+
+
+class EventObjectFilter(filter_interface.FilterObject):
+  """A simple filter using the objectfilter library."""
+
+  def CompileFilter(self, filter_string):
+    """Compile the filter string into a filter matcher."""
+    self.matcher = pfilter.GetMatcher(filter_string, True)
+    if not self.matcher:
+      raise errors.WrongPlugin('Malformed filter string.')
+
+  def Match(self, event_object):
+    """Evaluate an EventObject against a filter."""
+    if not self.matcher:
+      return True
+
+    self._decision = self.matcher.Matches(event_object)
+
+    return self._decision
+
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the EventObjectFilter filter."""
+import unittest
+
+from plaso.filters import test_helper
+from plaso.filters import eventfilter
+
+
+class EventObjectFilterTest(test_helper.FilterTestHelper):
+  """Tests for the EventObjectFilter filter."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self.test_filter = eventfilter.EventObjectFilter()
+
+  def testFilterFail(self):
+    """Run few tests that should not be a proper filter."""
+    self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
+    self.TestFail('/tmp/file_that_most_likely_does_not_exist')
+    self.TestFail('some random stuff that is destined to fail')
+    self.TestFail('some_stuff is "random" and other_stuff ')
+
+  def testFilterApprove(self):
+    self.TestTrue('some_stuff is "random" and other_stuff is not "random"')
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,109 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains definition for a list of ObjectFilter."""
+import os
+import yaml
+import logging
+
+from plaso.lib import errors
+from plaso.lib import filter_interface
+from plaso.lib import pfilter
+
+
+def IncludeKeyword(loader, node):
+  """A constructor for the include keyword in YAML."""
+  filename = loader.construct_scalar(node)
+  if os.path.isfile(filename):
+    with open(filename, 'rb') as fh:
+      try:
+        data = yaml.safe_load(fh)
+      except yaml.ParserError as exception:
+        logging.error(u'Unable to load rule file with error: {0:s}'.format(
+            exception))
+        return None
+  return data
+
+
+class ObjectFilterList(filter_interface.FilterObject):
+  """A series of Pfilter filters along with metadata."""
+
+  def CompileFilter(self, filter_string):
+    """Compile a set of ObjectFilters defined in an YAML file."""
+    if not os.path.isfile(filter_string):
+      raise errors.WrongPlugin((
+          'ObjectFilterList requires an YAML file to be passed on, this filter '
+          'string is not a file.'))
+
+    yaml.add_constructor('!include', IncludeKeyword,
+                         Loader=yaml.loader.SafeLoader)
+    results = None
+
+    with open(filter_string, 'rb') as fh:
+      try:
+        results = yaml.safe_load(fh)
+      except (yaml.scanner.ScannerError, IOError) as exception:
+        raise errors.WrongPlugin(
+            u'Unable to parse YAML file with error: {0:s}.'.format(exception))
+
+    self.filters = []
+    if type(results) is dict:
+      self._ParseEntry(results)
+    elif type(results) is list:
+      for result in results:
+        if type(result) is not dict:
+          raise errors.WrongPlugin(
+              u'Wrong format of YAML file, entry not a dict ({})'.format(
+                  type(result)))
+        self._ParseEntry(result)
+    else:
+      raise errors.WrongPlugin(
+          u'Wrong format of YAML file, entry not a dict ({})'.format(
+              type(result)))
+
+  def _ParseEntry(self, entry):
+    """Parse a single YAML filter entry."""
+    # A single file with a list of filters to parse.
+    for name, meta in entry.items():
+      if 'filter' not in meta:
+        raise errors.WrongPlugin(
+            u'Entry inside {} does not contain a filter statement.'.format(
+                name))
+
+      matcher = pfilter.GetMatcher(meta.get('filter'), True)
+      if not matcher:
+        raise errors.WrongPlugin(
+            u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format(
+                meta.get('filter'), name))
+
+      self.filters.append((name, matcher, meta))
+
+  def Match(self, event_object):
+    """Evaluate an EventObject against a pfilter."""
+    if not self.filters:
+      return True
+
+    for name, matcher, meta in self.filters:
+      self._decision = matcher.Matches(event_object)
+      if self._decision:
+        self._reason = u'[{}] {} {}'.format(
+            name, meta.get('description', 'N/A'), u' - '.join(
+                meta.get('urls', [])))
+        return True
+
+    return False
+
+
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the PFilters filter."""
+
+import os
+import logging
+import tempfile
+import unittest
+
+from plaso.filters import filterlist
+from plaso.filters import test_helper
+
+
+class ObjectFilterTest(test_helper.FilterTestHelper):
+  """Tests for the ObjectFilterList filter."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self.test_filter = filterlist.ObjectFilterList()
+
+  def testFilterFail(self):
+    """Run few tests that should not be a proper filter."""
+    self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
+    self.TestFail('/tmp/file_that_most_likely_does_not_exist')
+    self.TestFail('some random stuff that is destined to fail')
+    self.TestFail('some_stuff is "random" and other_stuff ')
+    self.TestFail('some_stuff is "random" and other_stuff is not "random"')
+
+  def CreateFileAndTest(self, content):
+    """Creates a file and then runs the test."""
+    name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as file_object:
+      name = file_object.name
+      file_object.write(content)
+
+    self.TestTrue(name)
+
+    try:
+      os.remove(name)
+    except (OSError, IOError) as exception:
+      logging.warning(
+          u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
+              name, exception))
+
+  def testFilterApprove(self):
+    one_rule = u'\n'.join([
+        u'Again_Dude:',
+        u'  description: Heavy artillery caught on fire',
+        u'  case_nr: 62345',
+        u'  analysts: [anonymous]',
+        u'  urls: [cnn.com,microsoft.com]',
+        u'  filter: message contains "dude where is my car"'])
+
+    self.CreateFileAndTest(one_rule)
+
+    collection = u'\n'.join([
+        u'Rule_Dude:',
+        u'    description: This is the very case I talk about, a lot',
+        u'    case_nr: 1235',
+        u'    analysts: [dude, jack, horn]',
+        u'    urls: [mbl.is,visir.is]',
+        (u'    filter: date > "2012-01-01 10:54:13" and parser not contains '
+         u'"evtx"'),
+        u'',
+        u'Again_Dude:',
+        u'  description: Heavy artillery caught on fire',
+        u'  case_nr: 62345',
+        u'  analysts: [smith, perry, john]',
+        u'  urls: [cnn.com,microsoft.com]',
+        u'  filter: message contains "dude where is my car"',
+        u'',
+        u'Third_Rule_Of_Thumb:',
+        u'    description: Another ticket for another day.',
+        u'    case_nr: 234',
+        u'    analysts: [joe]',
+        u'    urls: [mbl.is,symantec.com/whereevillies,virustotal.com/myhash]',
+        u'    filter: evil_bit is 1'])
+
+    self.CreateFileAndTest(collection)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains helper function and classes for filters."""
+import unittest
+
+from plaso.lib import errors
+
+
+class FilterTestHelper(unittest.TestCase):
+  """A simple class that provides helper functions for testing filters."""
+
+  def setUp(self):
+    """This should be overwritten."""
+    self.test_filter = None
+
+  def TestTrue(self, query):
+    """A quick test that should return a valid filter."""
+    if not self.test_filter:
+      self.assertTrue(False)
+
+    try:
+      self.test_filter.CompileFilter(query)
+      # And a success.
+      self.assertTrue(True)
+    except errors.WrongPlugin:
+      # Let the test fail.
+      self.assertTrue(False)
+
+  def TestFail(self, query):
+    """A quick failure test with a filter."""
+    if not self.test_filter:
+      self.assertTrue(False)
+
+    with self.assertRaises(errors.WrongPlugin):
+      self.test_filter.CompileFilter(query)
+
@@ -0,0 +1,86 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains an import statement for each formatter."""
+
+from plaso.formatters import android_app_usage
+from plaso.formatters import android_calls
+from plaso.formatters import android_sms
+from plaso.formatters import appcompatcache
+from plaso.formatters import appusage
+from plaso.formatters import asl
+from plaso.formatters import bencode_parser
+from plaso.formatters import bsm
+from plaso.formatters import chrome
+from plaso.formatters import chrome_cache
+from plaso.formatters import chrome_cookies
+from plaso.formatters import chrome_extension_activity
+from plaso.formatters import cups_ipp
+from plaso.formatters import filestat
+from plaso.formatters import firefox
+from plaso.formatters import firefox_cache
+from plaso.formatters import firefox_cookies
+from plaso.formatters import ganalytics
+from plaso.formatters import gdrive
+from plaso.formatters import hachoir
+from plaso.formatters import iis
+from plaso.formatters import ipod
+from plaso.formatters import java_idx
+from plaso.formatters import ls_quarantine
+from plaso.formatters import mac_appfirewall
+from plaso.formatters import mac_document_versions
+from plaso.formatters import mac_keychain
+from plaso.formatters import mac_securityd
+from plaso.formatters import mac_wifi
+from plaso.formatters import mackeeper_cache
+from plaso.formatters import mactime
+from plaso.formatters import mcafeeav
+from plaso.formatters import msie_webcache
+from plaso.formatters import msiecf
+from plaso.formatters import olecf
+from plaso.formatters import opera
+from plaso.formatters import oxml
+from plaso.formatters import pcap
+from plaso.formatters import plist
+from plaso.formatters import popcontest
+from plaso.formatters import pls_recall
+from plaso.formatters import recycler
+from plaso.formatters import rubanetra
+from plaso.formatters import safari
+from plaso.formatters import selinux
+from plaso.formatters import shell_items
+from plaso.formatters import skydrivelog
+from plaso.formatters import skydrivelogerr
+from plaso.formatters import skype
+from plaso.formatters import symantec
+from plaso.formatters import syslog
+from plaso.formatters import task_scheduler
+from plaso.formatters import text
+from plaso.formatters import utmp
+from plaso.formatters import utmpx
+from plaso.formatters import windows
+from plaso.formatters import winevt
+from plaso.formatters import winevtx
+from plaso.formatters import winfirewall
+from plaso.formatters import winjob
+from plaso.formatters import winlnk
+from plaso.formatters import winprefetch
+from plaso.formatters import winreg
+from plaso.formatters import winregservice
+from plaso.formatters import xchatlog
+from plaso.formatters import xchatscrollback
+from plaso.formatters import zeitgeist
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Android Application Usage."""
+
+from plaso.formatters import interface
+
+
+class AndroidApplicationFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an Application Last Resumed event."""
+
+  DATA_TYPE = 'android:event:last_resume_time'
+
+  FORMAT_STRING_PIECES = [
+      u'Package: {package}',
+      u'Component: {component}']
+
+  SOURCE_LONG = 'Android App Usage'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Android contacts2.db database events."""
+
+from plaso.formatters import interface
+
+
+class AndroidCallFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Android call history events."""
+
+  DATA_TYPE = 'android:event:call'
+
+  FORMAT_STRING_PIECES = [
+      u'{call_type}',
+      u'Number: {number}',
+      u'Name: {name}',
+      u'Duration: {duration} seconds']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{call_type} Call']
+
+  SOURCE_LONG = 'Android Call History'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Android mmssms.db database events."""
+
+from plaso.formatters import interface
+
+
+class AndroidSmsFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Android sms events."""
+
+  DATA_TYPE = 'android:messaging:sms'
+
+  FORMAT_STRING_PIECES = [
+      u'Type: {sms_type}',
+      u'Address: {address}',
+      u'Status: {sms_read}',
+      u'Message: {body}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{body}']
+
+  SOURCE_LONG = 'Android SMS messages'
+  SOURCE_SHORT = 'SMS'
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the AppCompatCache entries inside the Windows Registry."""
+
+from plaso.formatters import interface
+
+
+class AppCompatCacheFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an AppCompatCache Windows Registry entry."""
+
+  DATA_TYPE = 'windows:registry:appcompatcache'
+
+  FORMAT_STRING_PIECES = [
+      u'[{keyname}]',
+      u'Cached entry: {entry_index}',
+      u'Path: {path}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'Path: {path}']
+
+  SOURCE_LONG = 'AppCompatCache Registry Entry'
+  SOURCE_SHORT = 'REG'
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Mac OS X application usage."""
+
+from plaso.formatters import interface
+
+
+class ApplicationUsageFormatter(interface.EventFormatter):
+  """Define the formatting for Application Usage information."""
+
+  DATA_TYPE = 'macosx:application_usage'
+
+  FORMAT_STRING = (u'{application} v.{app_version} (bundle: {bundle_id}).'
+                   ' Launched: {count} time(s)')
+  FORMAT_STRING_SHORT = u'{application} ({count} time(s))'
+
+  SOURCE_LONG = 'Application Usage'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Apple System Log binary files."""
+
+from plaso.formatters import interface
+
+
+class AslFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an ASL log entry."""
+
+  DATA_TYPE = 'mac:asl:event'
+
+  FORMAT_STRING_PIECES = [
+      u'MessageID: {message_id}',
+      u'Level: {level}',
+      u'User ID: {user_sid}',
+      u'Group ID: {group_id}',
+      u'Read User: {read_uid}',
+      u'Read Group: {read_gid}',
+      u'Host: {computer_name}',
+      u'Sender: {sender}',
+      u'Facility: {facility}',
+      u'Message: {message}',
+      u'{extra_information}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Host: {host}',
+      u'Sender: {sender}',
+      u'Facility: {facility}']
+
+  SOURCE_LONG = 'ASL entry'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,49 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for bencode parser events."""
+
+from plaso.formatters import interface
+
+
+class uTorrentFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a BitTorrent uTorrent active torrents."""
+
+  DATA_TYPE = 'p2p:bittorrent:utorrent'
+
+  SOURCE_LONG = 'uTorrent Active Torrents'
+  SOURCE_SHORT = 'TORRENT'
+
+  FORMAT_STRING_SEPARATOR = u'; '
+
+  FORMAT_STRING_PIECES = [u'Torrent {caption}',
+                          u'Saved to {path}',
+                          u'Minutes seeded: {seedtime}']
+
+
+class TransmissionFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a Transmission active torrents."""
+
+  DATA_TYPE = 'p2p:bittorrent:transmission'
+
+  SOURCE_LONG = 'Transmission Active Torrents'
+  SOURCE_SHORT = 'TORRENT'
+
+  FORMAT_STRING_SEPARATOR = u'; '
+
+  FORMAT_STRING_PIECES = [u'Saved to {destination}',
+                          u'Minutes seeded: {seedtime}']
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Basic Security Module binary files."""
+
+from plaso.formatters import interface
+
+
+class MacBSMFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an BSM log entry."""
+
+  DATA_TYPE = 'mac:bsm:event'
+
+  FORMAT_STRING_PIECES = [
+      u'Type: {event_type}',
+      u'Return: {return_value}',
+      u'Information: {extra_tokens}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Type: {event_type}',
+      u'Return: {return_value}']
+
+  SOURCE_LONG = 'BSM entry'
+  SOURCE_SHORT = 'LOG'
+
+
+class BSMFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an BSM log entry."""
+
+  DATA_TYPE = 'bsm:event'
+
+  FORMAT_STRING_PIECES = [
+      u'Type: {event_type}',
+      u'Information: {extra_tokens}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Type: {event_type}']
+
+  SOURCE_LONG = 'BSM entry'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Google Chrome history."""
+
+from plaso.formatters import interface
+
+
+class ChromePageVisitedFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for page visited data in Chrome History."""
+
+  DATA_TYPE = 'chrome:history:page_visited'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({title})',
+      u'[count: {typed_count}]',
+      u'Host: {host}',
+      u'Visit from: {from_visit}',
+      u'Visit Source: [{visit_source}]',
+      u'{extra}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{url}',
+      u'({title})']
+
+  SOURCE_LONG = 'Chrome History'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class ChromeFileDownloadFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for file downloaded data in Chrome History."""
+
+  DATA_TYPE = 'chrome:history:file_downloaded'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({full_path}).',
+      u'Received: {received_bytes} bytes',
+      u'out of: {total_bytes} bytes.']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{full_path} downloaded',
+      u'({received_bytes} bytes)']
+
+  SOURCE_LONG = 'Chrome History'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Chrome Cache files based-events."""
+
+from plaso.formatters import interface
+
+
+class ChromeCacheEntryEventFormatter(interface.ConditionalEventFormatter):
+  """Class contains the Chrome Cache Entry event formatter."""
+
+  DATA_TYPE = 'chrome:cache:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'Original URL: {original_url}']
+
+  SOURCE_LONG = 'Chrome Cache'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Google Chrome cookie."""
+
+from plaso.formatters import interface
+
+
+class ChromeCookieFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for cookie data in Chrome Cookies database."""
+
+  DATA_TYPE = 'chrome:cookie:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({cookie_name})',
+      u'Flags:',
+      u'[HTTP only] = {httponly}',
+      u'[Persistent] = {persistent}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{host}',
+      u'({cookie_name})']
+
+  SOURCE_LONG = 'Chrome Cookies'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Google extension activity database events."""
+
+from plaso.formatters import interface
+
+
+class ChromeExtensionActivityEventFormatter(
+    interface.ConditionalEventFormatter):
+  """The event formatter for Chrome extension activity log entries."""
+
+  DATA_TYPE = 'chrome:extension_activity:activity_log'
+
+  FORMAT_STRING_PIECES = [
+      u'Chrome extension: {extension_id}',
+      u'Action type: {action_type}',
+      u'Activity identifier: {activity_id}',
+      u'Page URL: {page_url}',
+      u'Page title: {page_title}',
+      u'API name: {api_name}',
+      u'Args: {args}',
+      u'Other: {other}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{extension_id}',
+      u'{api_name}',
+      u'{args}']
+
+  SOURCE_LONG = 'Chrome Extension Activity'
+  SOURCE_SHORT = 'WEBHIST'
+
+  # TODO: add action_type string representation.
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for CUPS IPP file."""
+
+from plaso.formatters import interface
+
+
+class CupsIppFormatter(interface.ConditionalEventFormatter):
+  """Formatter for CUPS IPP file."""
+
+  DATA_TYPE = 'cups:ipp:event'
+
+  FORMAT_STRING_PIECES = [
+      u'Status: {status}',
+      u'User: {user}',
+      u'Owner: {owner}',
+      u'Job Name: {job_name}',
+      u'Application: {application}',
+      u'Document type: {type_doc}',
+      u'Printer: {printer_id}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Status: {status}',
+      u'Job Name: {job_name}']
+
+  SOURCE_LONG = 'CUPS IPP Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Stat object of a PFile."""
+
+from plaso.lib import errors
+from plaso.formatters import interface
+
+
+class PfileStatFormatter(interface.ConditionalEventFormatter):
+  """Define the formatting for PFileStat."""
+
+  DATA_TYPE = 'fs:stat'
+
+  FORMAT_STRING_PIECES = [u'{display_name}',
+                          u'({unallocated})']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{filename}']
+
+  SOURCE_SHORT = 'FILE'
+
+  def GetSources(self, event_object):
+    """Return a list of source short and long messages."""
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    self.source_string = u'{0:s} {1:s}'.format(
+        getattr(event_object, 'fs_type', u'Unknown FS'),
+        getattr(event_object, 'timestamp_desc', u'Time'))
+
+    return super(PfileStatFormatter, self).GetSources(event_object)
+
+  def GetMessages(self, event_object):
+    """Returns a list of messages extracted from an event object.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    if not getattr(event_object, 'allocated', True):
+      event_object.unallocated = u'unallocated'
+
+    return super(PfileStatFormatter, self).GetMessages(event_object)
@@ -0,0 +1,136 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Mozilla Firefox history."""
+
+from plaso.lib import errors
+from plaso.formatters import interface
+
+
+class FirefoxBookmarkAnnotationFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a Firefox places.sqlite bookmark annotation."""
+
+  DATA_TYPE = 'firefox:places:bookmark_annotation'
+
+  FORMAT_STRING_PIECES = [
+      u'Bookmark Annotation: [{content}]',
+      u'to bookmark [{title}]',
+      u'({url})']
+
+  FORMAT_STRING_SHORT_PIECES = [u'Bookmark Annotation: {title}']
+
+  SOURCE_LONG = 'Firefox History'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class FirefoxBookmarkFolderFormatter(interface.EventFormatter):
+  """Formatter for a Firefox places.sqlite bookmark folder."""
+
+  DATA_TYPE = 'firefox:places:bookmark_folder'
+
+  FORMAT_STRING = u'{title}'
+
+  SOURCE_LONG = 'Firefox History'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class FirefoxBookmarkFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a Firefox places.sqlite URL bookmark."""
+
+  DATA_TYPE = 'firefox:places:bookmark'
+
+  FORMAT_STRING_PIECES = [
+      u'Bookmark {type}',
+      u'{title}',
+      u'({url})',
+      u'[{places_title}]',
+      u'visit count {visit_count}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Bookmarked {title}',
+      u'({url})']
+
+  SOURCE_LONG = 'Firefox History'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class FirefoxPageVisitFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a Firefox places.sqlite page visited."""
+
+  DATA_TYPE = 'firefox:places:page_visited'
+
+  # Transitions defined in the source file:
+  #   src/toolkit/components/places/nsINavHistoryService.idl
+  # Also contains further explanation into what each of these settings mean.
+  _URL_TRANSITIONS = {
+      1: 'LINK',
+      2: 'TYPED',
+      3: 'BOOKMARK',
+      4: 'EMBED',
+      5: 'REDIRECT_PERMANENT',
+      6: 'REDIRECT_TEMPORARY',
+      7: 'DOWNLOAD',
+      8: 'FRAMED_LINK',
+  }
+  _URL_TRANSITIONS.setdefault('UNKOWN')
+
+  # TODO: Make extra conditional formatting.
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({title})',
+      u'[count: {visit_count}]',
+      u'Host: {host}',
+      u'{extra_string}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'URL: {url}']
+
+  SOURCE_LONG = 'Firefox History'
+  SOURCE_SHORT = 'WEBHIST'
+
+  def GetMessages(self, event_object):
+    """Return the message strings."""
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    transition = self._URL_TRANSITIONS.get(
+        getattr(event_object, 'visit_type', 0), None)
+
+    if transition:
+      transition_str = u'Transition: {0!s}'.format(transition)
+
+    if hasattr(event_object, 'extra'):
+      if transition:
+        event_object.extra.append(transition_str)
+      event_object.extra_string = u' '.join(event_object.extra)
+    elif transition:
+      event_object.extra_string = transition_str
+
+    return super(FirefoxPageVisitFormatter, self).GetMessages(event_object)
+
+
+class FirefoxDowloadFormatter(interface.EventFormatter):
+  """Formatter for a Firefox downloads.sqlite download."""
+
+  DATA_TYPE = 'firefox:downloads:download'
+
+  FORMAT_STRING = (u'{url} ({full_path}). Received: {received_bytes} bytes '
+                   u'out of: {total_bytes} bytes.')
+  FORMAT_STRING_SHORT = u'{full_path} downloaded ({received_bytes} bytes)'
+
+  SOURCE_LONG = 'Firefox History'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Firefox cache records."""
+
+from plaso.formatters import interface
+
+class FirefoxCacheFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Firefox cache record."""
+
+  DATA_TYPE = 'firefox:cache:record'
+
+  FORMAT_STRING_PIECES = [
+      u'Fetched {fetch_count} time(s)',
+      u'[{response_code}]',
+      u'{request_method}',
+      u'"{url}"']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'[{response_code}]',
+      u'{request_method}',
+      u'"{url}"']
+
+  SOURCE_LONG = 'Firefox Cache'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Firefox cookie."""
+
+from plaso.formatters import interface
+
+
+class FirefoxCookieFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for cookie data in Firefox Cookies database."""
+
+  DATA_TYPE = 'firefox:cookie:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({cookie_name})',
+      u'Flags:',
+      u'[HTTP only]: {httponly}',
+      u'(GA analysis: {ga_data})']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{host}',
+      u'({cookie_name})']
+
+  SOURCE_LONG = 'Firefox Cookies'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Google Analytics cookie."""
+
+from plaso.formatters import interface
+
+
+class AnalyticsUtmaCookieFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for UTMA Google Analytics cookie."""
+
+  DATA_TYPE = 'cookie:google:analytics:utma'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({cookie_name})',
+      u'Sessions: {sessions}',
+      u'Domain Hash: {domain_hash}',
+      u'Visitor ID: {domain_hash}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{url}',
+      u'({cookie_name})']
+
+  SOURCE_LONG = 'Google Analytics Cookies'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class AnalyticsUtmbCookieFormatter(AnalyticsUtmaCookieFormatter):
+  """The event formatter for UTMB Google Analytics cookie."""
+
+  DATA_TYPE = 'cookie:google:analytics:utmb'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({cookie_name})',
+      u'Pages Viewed: {pages_viewed}',
+      u'Domain Hash: {domain_hash}']
+
+
+class AnalyticsUtmzCookieFormatter(AnalyticsUtmaCookieFormatter):
+  """The event formatter for UTMZ Google Analytics cookie."""
+
+  DATA_TYPE = 'cookie:google:analytics:utmz'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({cookie_name})',
+      u'Sessions: {sessions}',
+      u'Domain Hash: {domain_hash}',
+      u'Sources: {sources}',
+      u'Last source used to access: {utmcsr}',
+      u'Ad campaign information: {utmccn}',
+      u'Last type of visit: {utmcmd}',
+      u'Keywords used to find site: {utmctr}',
+      u'Path to the page of referring link: {utmcct}']
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Google Drive snaphots."""
+
+from plaso.formatters import interface
+
+
+__author__ = 'David Nides (david.nides@gmail.com)'
+
+
+class GDriveCloudEntryFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Google Drive snapshot cloud entry."""
+
+  DATA_TYPE = 'gdrive:snapshot:cloud_entry'
+
+  FORMAT_STRING_PIECES = [
+      u'File Path: {path}',
+      u'[{shared}]',
+      u'Size: {size}',
+      u'URL: {url}',
+      u'Type: {document_type}']
+  FORMAT_STRING_SHORT_PIECES = [u'{path}']
+
+  SOURCE_LONG = 'Google Drive (cloud entry)'
+  SOURCE_SHORT = 'LOG'
+
+
+class GDriveLocalEntryFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Google Drive snapshot local entry."""
+
+  DATA_TYPE = 'gdrive:snapshot:local_entry'
+
+  FORMAT_STRING_PIECES = [
+      u'File Path: {path}',
+      u'Size: {size}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{path}']
+
+  SOURCE_LONG = 'Google Drive (local entry)'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Hachoir events."""
+
+from plaso.lib import errors
+from plaso.formatters import interface
+
+
+__author__ = 'David Nides (david.nides@gmail.com)'
+
+
+class HachoirFormatter(interface.EventFormatter):
+  """Formatter for Hachoir based events."""
+
+  DATA_TYPE = 'metadata:hachoir'
+  FORMAT_STRING = u'{data}'
+
+  SOURCE_LONG = 'Hachoir Metadata'
+  SOURCE_SHORT = 'META'
+
+  def GetMessages(self, event_object):
+    """Returns a list of messages extracted from an event object.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    string_parts = []
+    for key, value in sorted(event_object.metadata.items()):
+      string_parts.append(u'{0:s}: {1:s}'.format(key, value))
+
+    event_object.data = u' '.join(string_parts)
+
+    return super(HachoirFormatter, self).GetMessages(event_object)
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Windows IIS log files."""
+
+from plaso.formatters import interface
+
+
+__author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)'
+
+
+class WinIISFormatter(interface.ConditionalEventFormatter):
+  """A formatter for Windows IIS log entries."""
+
+  DATA_TYPE = 'iis:log:line'
+
+  FORMAT_STRING_PIECES = [
+      u'{http_method}',
+      u'{requested_uri_stem}',
+      u'[',
+      u'{source_ip}',
+      u'>',
+      u'{dest_ip}',
+      u':',
+      u'{dest_port}',
+      u']',
+      u'Http Status: {http_status}',
+      u'Bytes Sent: {sent_bytes}',
+      u'Bytes Received: {received_bytes}',
+      u'User Agent: {user_agent}',
+      u'Protocol Version: {protocol_version}',]
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'{http_method}',
+      u'{requested_uri_stem}',
+      u'[',
+      u'{source_ip}',
+      u'>',
+      u'{dest_ip}',
+      u':',
+      u'{dest_port}',
+      u']',]
+
+  SOURCE_LONG = 'IIS Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,244 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the event formatters interface classes."""
+
+import re
+
+from plaso.lib import errors
+from plaso.lib import registry
+
+
+class EventFormatter(object):
+  """Base class to format event type specific data using a format string.
+
+     Define the (long) format string and the short format string by defining
+     FORMAT_STRING and FORMAT_STRING_SHORT. The syntax of the format strings
+     is similar to that of format() where the place holder for a certain
+     event object attribute is defined as {attribute_name}.
+  """
+  __metaclass__ = registry.MetaclassRegistry
+  __abstract = True
+
+  # The data type is a unique identifier for the event data. The current
+  # approach is to define it as human readable string in the format
+  # root:branch: ... :leaf, e.g. a page visited entry inside a Chrome History
+  # database is defined as: chrome:history:page_visited.
+  DATA_TYPE = u'internal'
+
+  # The format string.
+  FORMAT_STRING = u''
+  FORMAT_STRING_SHORT = u''
+
+  # The source short and long strings.
+  SOURCE_SHORT = u'LOG'
+  SOURCE_LONG = u''
+
+  def __init__(self):
+    """Set up the formatter and determine if this is the right formatter."""
+    # Forcing the format string to be unicode to make sure we don't
+    # try to format it as an ASCII string.
+    self.format_string = unicode(self.FORMAT_STRING)
+    self.format_string_short = unicode(self.FORMAT_STRING_SHORT)
+    self.source_string = unicode(self.SOURCE_LONG)
+    self.source_string_short = unicode(self.SOURCE_SHORT)
+
+  def GetMessages(self, event_object):
+    """Return a list of messages extracted from an event object.
+
+    The l2t_csv and other formats are dependent on a message field,
+    referred to as description_long and description_short in l2t_csv.
+
+    Plaso does not store this field explicitly, it only contains a format
+    string and the appropriate attributes.
+
+    This method takes the format string and converts that back into a
+    formatted string that can be used for display.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+
+    Raises:
+      WrongFormatter: if the event object cannot be formatted by the formatter.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    event_values = event_object.GetValues()
+
+    try:
+      msg = self.format_string.format(**event_values)
+    except KeyError as exception:
+      msgs = []
+      msgs.append(u'Format error: [{0:s}] for: <{1:s}>'.format(
+          exception, self.format_string))
+      for attr, value in event_object.GetValues().iteritems():
+        msgs.append(u'{0}: {1}'.format(attr, value))
+
+      msg = u' '.join(msgs)
+
+    # Strip carriage return and linefeed form the message strings.
+    # Using replace function here because it is faster
+    # than re.sub() or string.strip().
+    msg = msg.replace('\r', u'').replace('\n', u'')
+
+    if not self.format_string_short:
+      msg_short = msg
+    else:
+      try:
+        msg_short = self.format_string_short.format(**event_values)
+        # Using replace function here because it is faster
+        # than re.sub() or string.strip().
+        msg_short = msg_short.replace('\r', u'').replace('\n', u'')
+      except KeyError:
+        msg_short = u'Unable to format short message string: {0:s}'.format(
+            self.format_string_short)
+
+    # Truncate the short message string if necessary.
+    if len(msg_short) > 80:
+      msg_short = u'{0:s}...'.format(msg_short[0:77])
+
+    return msg, msg_short
+
+  def GetSources(self, event_object):
+    """Return a list containing source short and long."""
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    return self.source_string_short, self.source_string
+
+
+class ConditionalEventFormatter(EventFormatter):
+  """Base class to conditionally format event data using format string pieces.
+
+     Define the (long) format string and the short format string by defining
+     FORMAT_STRING_PIECES and FORMAT_STRING_SHORT_PIECES. The syntax of the
+     format strings pieces is similar to of the event formatter
+     (EventFormatter). Every format string piece should contain a single
+     attribute name or none.
+
+     FORMAT_STRING_SEPARATOR is used to control the string which the separate
+     string pieces should be joined. It contains a space by default.
+  """
+  __abstract = True
+
+  # The format string pieces.
+  FORMAT_STRING_PIECES = [u'']
+  FORMAT_STRING_SHORT_PIECES = [u'']
+
+  # The separator used to join the string pieces.
+  FORMAT_STRING_SEPARATOR = u' '
+
+  def __init__(self):
+    """Initializes the conditional formatter.
+
+       A map is build of the string pieces and their corresponding attribute
+       name to optimize conditional string formatting.
+
+    Raises:
+      RuntimeError: when an invalid format string piece is encountered.
+    """
+    super(ConditionalEventFormatter, self).__init__()
+
+    # The format string can be defined as:
+    # {name}, {name:format}, {name!conversion}, {name!conversion:format}
+    regexp = re.compile('{[a-z][a-zA-Z0-9_]*[!]?[^:}]*[:]?[^}]*}')
+    regexp_name = re.compile('[a-z][a-zA-Z0-9_]*')
+
+    # The format string pieces map is a list containing the attribute name
+    # per format string piece. E.g. ["Description: {description}"] would be
+    # mapped to: [0] = "description". If the string piece does not contain
+    # an attribute name it is treated as text that does not needs formatting.
+    self._format_string_pieces_map = []
+    for format_string_piece in self.FORMAT_STRING_PIECES:
+      result = regexp.findall(format_string_piece)
+      if not result:
+        # The text format string piece is stored as an empty map entry to
+        # keep the index in the map equal to the format string pieces.
+        self._format_string_pieces_map.append('')
+      elif len(result) == 1:
+        # Extract the attribute name.
+        attribute_name = regexp_name.findall(result[0])[0]
+        self._format_string_pieces_map.append(attribute_name)
+      else:
+        raise RuntimeError((
+            u'Invalid format string piece: [{0:s}] contains more than 1 '
+            u'attribute name.').format(format_string_piece))
+
+    self._format_string_short_pieces_map = []
+    for format_string_piece in self.FORMAT_STRING_SHORT_PIECES:
+      result = regexp.findall(format_string_piece)
+      if not result:
+        # The text format string piece is stored as an empty map entry to
+        # keep the index in the map equal to the format string pieces.
+        self._format_string_short_pieces_map.append('')
+      elif len(result) == 1:
+        # Extract the attribute name.
+        attribute_name = regexp_name.findall(result[0])[0]
+        self._format_string_short_pieces_map.append(attribute_name)
+      else:
+        raise RuntimeError((
+            u'Invalid short format string piece: [{0:s}] contains more '
+            u'than 1 attribute name.').format(format_string_piece))
+
+  def GetMessages(self, event_object):
+    """Returns a list of messages extracted from an event object.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    # Using getattr here to make sure the attribute is not set to None.
+    # if A.b = None, hasattr(A, b) is True but getattr(A, b, None) is False.
+    string_pieces = []
+    for map_index, attribute_name in enumerate(self._format_string_pieces_map):
+      if not attribute_name or hasattr(event_object, attribute_name):
+        if attribute_name:
+          attribute = getattr(event_object, attribute_name, None)
+          # If an attribute is an int, yet has zero value we want to include
+          # that in the format string, since that is still potentially valid
+          # information. Otherwise we would like to skip it.
+          if type(attribute) not in (bool, int, long, float) and not attribute:
+            continue
+        string_pieces.append(self.FORMAT_STRING_PIECES[map_index])
+    self.format_string = unicode(
+        self.FORMAT_STRING_SEPARATOR.join(string_pieces))
+
+    string_pieces = []
+    for map_index, attribute_name in enumerate(
+        self._format_string_short_pieces_map):
+      if not attribute_name or getattr(event_object, attribute_name, None):
+        string_pieces.append(self.FORMAT_STRING_SHORT_PIECES[map_index])
+    self.format_string_short = unicode(
+        self.FORMAT_STRING_SEPARATOR.join(string_pieces))
+
+    return super(ConditionalEventFormatter, self).GetMessages(event_object)
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the iPod device events."""
+
+from plaso.formatters import interface
+
+
+class IPodDeviceFormatter(interface.ConditionalEventFormatter):
+  """Formatter for iPod device events."""
+
+  DATA_TYPE = 'ipod:device:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'Device ID: {device_id}',
+      u'Type: {device_class}',
+      u'[{family_id}]',
+      u'Connected {use_count} times',
+      u'Serial nr: {serial_number}',
+      u'IMEI [{imei}]']
+
+  SOURCE_LONG = 'iPod Connections'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Java Cache IDX events."""
+
+from plaso.formatters import interface
+
+
+class JavaIDXFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a Java Cache IDX download item."""
+
+  DATA_TYPE = 'java:download:idx'
+
+  SOURCE_LONG = 'Java Cache IDX'
+  SOURCE_SHORT = 'JAVA_IDX'
+
+  FORMAT_STRING_PIECES = [
+      u'IDX Version: {idx_version}',
+      u'Host IP address: ({ip_address})',
+      u'Download URL: {url}']
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Mac OS X launch services quarantine events."""
+
+from plaso.formatters import interface
+
+
+class LSQuarantineFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a LS Quarantine history event."""
+
+  DATA_TYPE = 'macosx:lsquarantine'
+
+  FORMAT_STRING_PIECES = [
+      u'[{agent}]',
+      u'Downloaded: {url}',
+      u'<{data}>']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{url}']
+
+  SOURCE_LONG = 'LS Quarantine Event'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Mac appfirewall.log file."""
+
+from plaso.formatters import interface
+
+class MacAppFirewallLogFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Mac appfirewall.log file."""
+
+  DATA_TYPE = 'mac:asl:appfirewall:line'
+
+  FORMAT_STRING_PIECES = [
+      u'Computer: {computer_name}',
+      u'Agent: {agent}',
+      u'Status: {status}',
+      u'Process name: {process_name}',
+      u'Log: {action}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Process name: {process_name}',
+      u'Status: {status}']
+
+  SOURCE_LONG = 'Mac AppFirewall Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for the Mac OS X Document Versions files."""
+
+from plaso.formatters import interface
+
+
+class MacDocumentVersionsFormatter(interface.ConditionalEventFormatter):
+  """The event formatter for page visited data in Document Versions."""
+
+  DATA_TYPE = 'mac:document_versions:file'
+
+  FORMAT_STRING_PIECES = [
+      u'Version of [{name}]',
+      u'({path})',
+      u'stored in {version_path}',
+      u'by {user_sid}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Stored a document version of [{name}]']
+
+  SOURCE_LONG = 'Document Versions'
+  SOURCE_SHORT = 'HISTORY'
@@ -0,0 +1,53 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Keychain password database file."""
+
+from plaso.formatters import interface
+
+
+class KeychainApplicationRecordFormatter(interface.ConditionalEventFormatter):
+  """Formatter for keychain application record event."""
+
+  DATA_TYPE = 'mac:keychain:application'
+
+  FORMAT_STRING_PIECES = [
+      u'Name: {entry_name}',
+      u'Account: {account_name}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{entry_name}']
+
+  SOURCE_LONG = 'Keychain Application password'
+  SOURCE_SHORT = 'LOG'
+
+
+class KeychainInternetRecordFormatter(interface.ConditionalEventFormatter):
+  """Formatter for keychain internet record event."""
+
+  DATA_TYPE = 'mac:keychain:internet'
+
+  FORMAT_STRING_PIECES = [
+      u'Name: {entry_name}',
+      u'Account: {account_name}',
+      u'Where: {where}',
+      u'Protocol: {protocol}',
+      u'({type_protocol})']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{entry_name}']
+
+  SOURCE_LONG = 'Keychain Internet password'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for ASL securityd log file."""
+
+from plaso.formatters import interface
+
+
+class MacSecuritydLogFormatter(interface.ConditionalEventFormatter):
+  """Formatter for ASL Securityd file."""
+
+  DATA_TYPE = 'mac:asl:securityd:line'
+
+  FORMAT_STRING_PIECES = [
+      u'Sender: {sender}',
+      u'({sender_pid})',
+      u'Level: {level}',
+      u'Facility: {facility}',
+      u'Text: {message}']
+
+  FORMAT_STRING_SHORT_PIECES = [u'Text: {message}']
+
+  SOURCE_LONG = 'Mac ASL Securityd Log'
+  SOURCE_SHORT = 'LOG'
+
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Mac wifi.log file."""
+
+from plaso.formatters import interface
+
+
+class MacWifiLogFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Mac Wifi file."""
+
+  DATA_TYPE = 'mac:wifilog:line'
+
+  FORMAT_STRING_PIECES = [
+      u'Action: {action}',
+      u'Agent: {user}',
+      u'({function})',
+      u'Log: {text}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Action: {action}']
+
+  SOURCE_LONG = 'Mac Wifi Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a MacKeepr Cache formatter in plaso."""
+
+from plaso.formatters import interface
+
+
+class MacKeeperCacheFormatter(interface.ConditionalEventFormatter):
+  """Formatter for MacKeeper Cache extracted events."""
+
+  DATA_TYPE = 'mackeeper:cache'
+
+  FORMAT_STRING_PIECES = [
+      u'{description}', u'<{event_type}>', u':', u'{text}', u'[',
+      u'URL: {url}', u'Event ID: {record_id}', 'Room: {room}', u']']
+
+  FORMAT_STRING_SHORT_PIECES = [u'<{event_type}>', u'{text}']
+
+  SOURCE_LONG = 'MacKeeper Cache'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Sleuthkit (TSK) bodyfile or mactime format."""
+
+from plaso.formatters import interface
+
+
+class MactimeFormatter(interface.EventFormatter):
+  """Class that formats mactime bodyfile events."""
+
+  DATA_TYPE = 'fs:mactime:line'
+
+  # The format string.
+  FORMAT_STRING = u'{filename}'
+
+  SOURCE_LONG = 'Mactime Bodyfile'
+  SOURCE_SHORT = 'FILE'
@@ -0,0 +1,140 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the event formatters manager class."""
+
+import logging
+
+from plaso.formatters import interface
+from plaso.lib import utils
+
+
+class DefaultFormatter(interface.EventFormatter):
+  """Default formatter for events that do not have any defined formatter."""
+
+  DATA_TYPE = u'event'
+  FORMAT_STRING = u'<WARNING DEFAULT FORMATTER> Attributes: {attribute_driven}'
+  FORMAT_STRING_SHORT = u'<DEFAULT> {attribute_driven}'
+
+  def GetMessages(self, event_object):
+    """Return a list of messages extracted from an event object."""
+    text_pieces = []
+
+    for key, value in event_object.GetValues().items():
+      if key in utils.RESERVED_VARIABLES:
+        continue
+      text_pieces.append(u'{0:s}: {1!s}'.format(key, value))
+
+    event_object.attribute_driven = u' '.join(text_pieces)
+    # Due to the way the default formatter behaves it requires the data_type
+    # to be set as 'event', otherwise it will complain and deny processing
+    # the event.
+    # TODO: Change this behavior and allow the default formatter to accept
+    # arbitrary data types (as it should).
+    old_data_type = getattr(event_object, 'data_type', None)
+    event_object.data_type = self.DATA_TYPE
+    msg, msg_short = super(DefaultFormatter, self).GetMessages(event_object)
+    event_object.data_type = old_data_type
+    return msg, msg_short
+
+
+class EventFormatterManager(object):
+  """Class to manage the event formatters."""
+
+  @classmethod
+  def GetFormatter(cls, event_object):
+    """Retrieves the formatter for a specific event object.
+
+       This function builds a map of data types and the corresponding event
+       formatters. At the moment this map is only build once.
+
+    Args:
+      event_object: The event object (EventObject) which is used to identify
+                    the formatter.
+
+    Returns:
+      The corresponding formatter (EventFormatter) if available or None.
+
+    Raises:
+      RuntimeError if a duplicate event formatter is found while building
+      the map of event formatters.
+    """
+    if not hasattr(cls, 'event_formatters'):
+      cls.event_formatters = {}
+      cls.default_formatter = DefaultFormatter()
+      for cls_formatter in interface.EventFormatter.classes:
+        try:
+          formatter = interface.EventFormatter.classes[cls_formatter]()
+
+          # Raise on duplicate formatters.
+          if formatter.DATA_TYPE in cls.event_formatters:
+            raise RuntimeError((
+                u'event formatter for data type: {0:s} defined in: {1:s} and '
+                u'{2:s}.').format(
+                    formatter.DATA_TYPE, cls_formatter,
+                    cls.event_formatters[
+                        formatter.DATA_TYPE].__class__.__name__))
+          cls.event_formatters[formatter.DATA_TYPE] = formatter
+        except RuntimeError as exeception:
+          # Ignore broken formatters.
+          logging.warning(u'{0:s}'.format(exeception))
+
+      cls.event_formatters.setdefault(None)
+
+    if event_object.data_type in cls.event_formatters:
+      return cls.event_formatters[event_object.data_type]
+    else:
+      logging.warning(
+          u'Using default formatter for data type: {0:s}'.format(
+              event_object.data_type))
+      return cls.default_formatter
+
+  @classmethod
+  def GetMessageStrings(cls, event_object):
+    """Retrieves the formatted message strings for a specific event object.
+
+    Args:
+      event_object: The event object (EventObject) which is used to identify
+                    the formatter.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    formatter = cls.GetFormatter(event_object)
+    if not formatter:
+      return u'', u''
+    return formatter.GetMessages(event_object)
+
+  @classmethod
+  def GetSourceStrings(cls, event_object):
+    """Retrieves the formatted source long and short strings for an event.
+
+    Args:
+      event_object: The event object (EventObject) which is used to identify
+                    the formatter.
+
+    Returns:
+      A list that contains the source_short and source_long version of the
+      event.
+    """
+    # TODO: change this to return the long variant first so it is consistent
+    # with GetMessageStrings.
+    formatter = cls.GetFormatter(event_object)
+    if not formatter:
+      return u'', u''
+    return formatter.GetSources(event_object)
@@ -0,0 +1,163 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a unit test for the event formatters."""
+
+import unittest
+
+from plaso.formatters import interface
+from plaso.formatters import manager
+from plaso.formatters import winreg  # pylint: disable=unused-import
+from plaso.lib import event_test
+
+
+class TestEvent1Formatter(interface.EventFormatter):
+  """Test event 1 formatter."""
+  DATA_TYPE = 'test:event1'
+  FORMAT_STRING = u'{text}'
+
+  SOURCE_SHORT = 'FILE'
+  SOURCE_LONG = 'Weird Log File'
+
+
+class WrongEventFormatter(interface.EventFormatter):
+  """A simple event formatter."""
+  DATA_TYPE = 'test:wrong'
+  FORMAT_STRING = u'This format string does not match {body}.'
+
+  SOURCE_SHORT = 'FILE'
+  SOURCE_LONG = 'Weird Log File'
+
+
+class EventFormatterUnitTest(unittest.TestCase):
+  """The unit test for the event formatter."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self._formatters_manager = manager.EventFormatterManager
+    self.event_objects = event_test.GetEventObjects()
+
+  def GetCSVLine(self, event_object):
+    """Takes an EventObject and prints out a simple CSV line from it."""
+    try:
+      msg, _ = self._formatters_manager.GetMessageStrings(event_object)
+      source_short, source_long = self._formatters_manager.GetSourceStrings(
+          event_object)
+    except KeyError:
+      print event_object.GetAttributes()
+    return u'{0:d},{1:s},{2:s},{3:s}'.format(
+        event_object.timestamp, source_short, source_long, msg)
+
+  def testInitialization(self):
+    """Test the initialization."""
+    self.assertTrue(TestEvent1Formatter())
+
+  def testAttributes(self):
+    """Test if we can read the event attributes correctly."""
+    events = {}
+    for event_object in self.event_objects:
+      events[self.GetCSVLine(event_object)] = True
+
+    self.assertIn((
+        u'1334961526929596,REG,UNKNOWN key,[MY AutoRun key] Run: '
+        u'c:/Temp/evil.exe'), events)
+
+    self.assertIn(
+        (u'1334966206929596,REG,UNKNOWN key,[//HKCU/Secret/EvilEmpire/'
+         u'Malicious_key] Value: send all the exes to the other '
+         u'world'), events)
+    self.assertIn((u'1334940286000000,REG,UNKNOWN key,[//HKCU/Windows'
+                   u'/Normal] Value: run all the benign stuff'), events)
+    self.assertIn((u'1335781787929596,FILE,Weird Log File,This log line reads '
+                   u'ohh so much.'), events)
+    self.assertIn((u'1335781787929596,FILE,Weird Log File,Nothing of interest'
+                   u' here, move on.'), events)
+    self.assertIn((u'1335791207939596,FILE,Weird Log File,Mr. Evil just logged'
+                   u' into the machine and got root.'), events)
+
+  def testTextBasedEvent(self):
+    """Test a text based event."""
+    for event_object in self.event_objects:
+      source_short, _ = self._formatters_manager.GetSourceStrings(event_object)
+      if source_short == 'LOG':
+        msg, msg_short = self._formatters_manager.GetMessageStrings(
+            event_object)
+
+        self.assertEquals(msg, (
+            u'This is a line by someone not reading the log line properly. And '
+            u'since this log line exceeds the accepted 80 chars it will be '
+            u'shortened.'))
+        self.assertEquals(msg_short, (
+            u'This is a line by someone not reading the log line properly. '
+            u'And since this l...'))
+
+
+class ConditionalTestEvent1(event_test.TestEvent1):
+  DATA_TYPE = 'test:conditional_event1'
+
+
+class ConditionalTestEvent1Formatter(interface.ConditionalEventFormatter):
+  """Test event 1 conditional (event) formatter."""
+  DATA_TYPE = 'test:conditional_event1'
+  FORMAT_STRING_PIECES = [
+      u'Description: {description}',
+      u'Comment',
+      u'Value: 0x{numeric:02x}',
+      u'Optional: {optional}',
+      u'Text: {text}']
+
+  SOURCE_SHORT = 'LOG'
+  SOURCE_LONG = 'Some Text File.'
+
+
+class BrokenConditionalEventFormatter(interface.ConditionalEventFormatter):
+  """A broken conditional event formatter."""
+  DATA_TYPE = 'test:broken_conditional'
+  FORMAT_STRING_PIECES = [u'{too} {many} formatting placeholders']
+
+  SOURCE_SHORT = 'LOG'
+  SOURCE_LONG = 'Some Text File.'
+
+
+class ConditionalEventFormatterUnitTest(unittest.TestCase):
+  """The unit test for the conditional event formatter."""
+
+  def setUp(self):
+    """Sets up the needed objects used throughout the test."""
+    self.event_object = ConditionalTestEvent1(1335791207939596, {
+        'numeric': 12, 'description': 'this is beyond words',
+        'text': 'but we\'re still trying to say something about the event'})
+
+  def testInitialization(self):
+    """Test the initialization."""
+    self.assertTrue(ConditionalTestEvent1Formatter())
+    with self.assertRaises(RuntimeError):
+      BrokenConditionalEventFormatter()
+
+  def testGetMessages(self):
+    """Test get messages."""
+    event_formatter = ConditionalTestEvent1Formatter()
+    msg, _ = event_formatter.GetMessages(self.event_object)
+
+    expected_msg = (
+        u'Description: this is beyond words Comment Value: 0x0c '
+        u'Text: but we\'re still trying to say something about the event')
+    self.assertEquals(msg, expected_msg)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the McAfee AV Logs files."""
+
+from plaso.formatters import interface
+
+
+class McafeeAccessProtectionLogEventFormatter(interface.EventFormatter):
+  """Class that formats the McAfee Access Protection Log events."""
+
+  DATA_TYPE = 'av:mcafee:accessprotectionlog'
+
+  # The format string.
+  FORMAT_STRING = (u'File Name: {filename} User: {username} {trigger_location} '
+                   u'{status} {rule} {action}')
+  FORMAT_STRING_SHORT = u'{filename} {action}'
+
+  SOURCE_LONG = 'McAfee Access Protection Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,99 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatters for the MSIE WebCache ESE database events."""
+
+from plaso.formatters import interface
+
+
+class MsieWebCacheContainerEventFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a MSIE WebCache ESE database Container_# table record."""
+
+  DATA_TYPE = 'msie:webcache:container'
+
+  FORMAT_STRING_PIECES = [
+      u'Entry identifier: {entry_identifier}',
+      u'Container identifier: {container_identifier}',
+      u'Cache identifier: {cache_identifier}',
+      u'URL: {url}',
+      u'Redirect URL: {redirect_url}',
+      u'Access count: {access_count}',
+      u'Sync count: {sync_count}',
+      u'Filename: {cached_filename}',
+      u'File extension: {file_extension}',
+      u'Cached file size: {cached_file_size}',
+      u'Request headers: {request_headers}',
+      u'Response headers: {response_headers}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'URL: {url}']
+
+  SOURCE_LONG = 'MSIE WebCache container record'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class MsieWebCacheContainersEventFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a MSIE WebCache ESE database Containers table record."""
+
+  DATA_TYPE = 'msie:webcache:containers'
+
+  FORMAT_STRING_PIECES = [
+      u'Container identifier: {container_identifier}',
+      u'Set identifier: {set_identifier}',
+      u'Name: {name}',
+      u'Directory: {directory}',
+      u'Table: Container_{container_identifier}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Directory: {directory}']
+
+  SOURCE_LONG = 'MSIE WebCache containers record'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class MsieWebCacheLeakFilesEventFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a MSIE WebCache ESE database LeakFiles table record."""
+
+  DATA_TYPE = 'msie:webcache:leak_file'
+
+  FORMAT_STRING_PIECES = [
+      u'Leak identifier: {leak_identifier}',
+      u'Filename: {cached_filename}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Filename: {cached_filename}']
+
+  SOURCE_LONG = 'MSIE WebCache partitions record'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class MsieWebCachePartitionsEventFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a MSIE WebCache ESE database Partitions table record."""
+
+  DATA_TYPE = 'msie:webcache:partitions'
+
+  FORMAT_STRING_PIECES = [
+      u'Partition identifier: {partition_identifier}',
+      u'Partition type: {partition_type}',
+      u'Directory: {directory}',
+      u'Table identifier: {table_identifier}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Directory: {directory}']
+
+  SOURCE_LONG = 'MSIE WebCache partitions record'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Microsoft Internet Explorer (MSIE) Cache Files (CF) events."""
+
+from plaso.lib import errors
+from plaso.formatters import interface
+
+
+class MsiecfUrlFormatter(interface.ConditionalEventFormatter):
+  """Formatter for a MSIECF URL item."""
+
+  DATA_TYPE = 'msiecf:url'
+
+  FORMAT_STRING_PIECES = [
+      u'Location: {url}',
+      u'Number of hits: {number_of_hits}',
+      u'Cached file size: {cached_file_size}',
+      u'HTTP headers: {http_headers_cleaned}',
+      u'{recovered_string}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Location: {url}']
+
+  SOURCE_LONG = 'MSIE Cache File URL record'
+  SOURCE_SHORT = 'WEBHIST'
+
+  def GetMessages(self, event_object):
+    """Returns a list of messages extracted from an event object.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    if hasattr(event_object, 'http_headers'):
+      event_object.http_headers_cleaned = event_object.http_headers.replace(
+          '\r\n', ' - ')
+    # TODO: Could this be moved upstream since this is done in other parsers
+    # as well?
+    if getattr(event_object, 'recovered', None):
+      event_object.recovered_string = '[Recovered Entry]'
+
+    return super(MsiecfUrlFormatter, self).GetMessages(event_object)
@@ -0,0 +1,149 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatters for OLE Compound File (OLECF) events."""
+
+from plaso.formatters import interface
+from plaso.lib import errors
+
+
+class OleCfItemFormatter(interface.EventFormatter):
+  """Formatter for an OLECF item."""
+
+  DATA_TYPE = 'olecf:item'
+
+  FORMAT_STRING = u'Name: {name}'
+  FORMAT_STRING_SHORT = u'Name: {name}'
+
+  SOURCE_LONG = 'OLECF Item'
+  SOURCE_SHORT = 'OLECF'
+
+
+class OleCfDestListEntryFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an OLECF DestList stream."""
+
+  DATA_TYPE = 'olecf:dest_list:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'Entry: {entry_number}',
+      u'Pin status: {pin_status_string}',
+      u'Hostname: {hostname}',
+      u'Path: {path}',
+      u'Droid volume identifier: {droid_volume_identifier}',
+      u'Droid file identifier: {droid_file_identifier}',
+      u'Birth droid volume identifier: {birth_droid_volume_identifier}',
+      u'Birth droid file identifier: {birth_droid_file_identifier}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Entry: {entry_number}',
+      u'Pin status: {pin_status_string}',
+      u'Path: {path}']
+
+  def GetMessages(self, event_object):
+    """Returns a list of messages extracted from an event object.
+
+    Args:
+      event_object: The event object (EventObject) containing the event
+                    specific data.
+
+    Returns:
+      A list that contains both the longer and shorter version of the message
+      string.
+    """
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    pin_status = getattr(event_object, 'pin_status', None)
+    if pin_status == 0xffffffff:
+      event_object.pin_status_string = u'Unpinned'
+    else:
+      event_object.pin_status_string = u'Pinned'
+
+    return super(OleCfDestListEntryFormatter, self).GetMessages(event_object)
+
+
+class OleCfDocumentSummaryInfoFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an OLECF Summary Info property set stream."""
+
+  DATA_TYPE = 'olecf:document_summary_info'
+
+  FORMAT_STRING_PIECES = [
+      u'Number of bytes: {number_of_bytes}',
+      u'Number of lines: {number_of_lines}',
+      u'Number of paragraphs: {number_of_paragraphs}',
+      u'Number of slides: {number_of_slides}',
+      u'Number of notes: {number_of_notes}',
+      u'Number of hidden slides: {number_of_hidden_slides}',
+      u'Number of multi-media clips: {number_of_clips}',
+      u'Company: {company}',
+      u'Manager: {manager}',
+      u'Shared document: {shared_document}',
+      u'Application version: {application_version}',
+      u'Content type: {content_type}',
+      u'Content status: {content_status}',
+      u'Language: {language}',
+      u'Document version: {document_version}']
+
+      # TODO: add support for the following properties.
+      # u'Digital signature: {digital_signature}',
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Company: {company}']
+
+  SOURCE_LONG = 'OLECF Document Summary Info'
+  SOURCE_SHORT = 'OLECF'
+
+
+class OleCfSummaryInfoFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an OLECF Summary Info property set stream."""
+
+  DATA_TYPE = 'olecf:summary_info'
+
+  FORMAT_STRING_PIECES = [
+      u'Title: {title}',
+      u'Subject: {subject}',
+      u'Author: {author}',
+      u'Keywords: {keywords}',
+      u'Comments: {comments}',
+      u'Template: {template}',
+      u'Revision number: {revision_number}',
+      u'Last saved by: {last_saved_by}',
+      u'Total edit time: {total_edit_time}',
+      u'Number of pages: {number_of_pages}',
+      u'Number of words: {number_of_words}',
+      u'Number of characters: {number_of_characters}',
+      u'Application: {application}',
+      u'Security: {security}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Title: {title}',
+      u'Subject: {subject}',
+      u'Author: {author}',
+      u'Revision number: {revision_number}']
+
+  SOURCE_LONG = 'OLECF Summary Info'
+  SOURCE_SHORT = 'OLECF'
+
+  # TODO: add a function to print the security as a descriptive string.
+  _SECURITY_VALUES = {
+      0x00000001: 'Password protected',
+      0x00000002: 'Read-only recommended',
+      0x00000004: 'Read-only enforced',
+      0x00000008: 'Locked for annotations',
+  }
+
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for Opera history events."""
+
+from plaso.formatters import interface
+
+
+class OperaGlobalHistoryFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an Opera global history event."""
+
+  DATA_TYPE = 'opera:history:entry'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({title})',
+      u'[{description}]']
+
+  SOURCE_LONG = 'Opera Browser History'
+  SOURCE_SHORT = 'WEBHIST'
+
+
+class OperaTypedHistoryFormatter(interface.ConditionalEventFormatter):
+  """Formatter for an Opera typed history event."""
+
+  DATA_TYPE = 'opera:history:typed_entry'
+
+  FORMAT_STRING_PIECES = [
+      u'{url}',
+      u'({entry_selection})']
+
+  SOURCE_LONG = 'Opera Browser History'
+  SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,67 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for OpenXML events."""
+
+from plaso.formatters import interface
+
+__author__ = 'David Nides (david.nides@gmail.com)'
+
+
+class OpenXMLParserFormatter(interface.ConditionalEventFormatter):
+  """Formatter for OXML events."""
+
+  DATA_TYPE = 'metadata:openxml'
+
+  FORMAT_STRING_PIECES = [
+      u'Creating App: {creating_app}',
+      u'App version: {app_version}',
+      u'Title: {title}',
+      u'Subject: {subject}',
+      u'Last saved by: {last_saved_by}',
+      u'Author: {author}',
+      u'Total edit time (secs): {total_edit_time}',
+      u'Keywords: {keywords}',
+      u'Comments: {comments}',
+      u'Revision Num: {revision_num}',
+      u'Template: {template}',
+      u'Num pages: {num_pages}',
+      u'Num words: {num_words}',
+      u'Num chars: {num_chars}',
+      u'Num chars with spaces: {num_chars_w_spaces}',
+      u'Num lines: {num_lines}',
+      u'Company: {company}',
+      u'Manager: {manager}',
+      u'Shared: {shared}',
+      u'Security: {security}',
+      u'Hyperlinks changed: {hyperlinks_changed}',
+      u'Links up to date: {links_up_to_date}',
+      u'Scale crop: {scale_crop}',
+      u'Digital signature: {dig_sig}',
+      u'Slides: {slides}',
+      u'Hidden slides: {hidden_slides}',
+      u'Presentation format: {presentation_format}',
+      u'MM clips: {mm_clips}',
+      u'Notes: {notes}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Title: {title}',
+      u'Subject: {subject}',
+      u'Author: {author}']
+
+  SOURCE_LONG = 'Open XML Metadata'
+  SOURCE_SHORT = 'META'
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for PCAP files."""
+
+from plaso.formatters import interface
+
+
+__author__ = 'Dominique Kilman (lexistar97@gmail.com)'
+
+
+class PCAPFormatter(interface.ConditionalEventFormatter):
+  """Define the formatting PCAP record."""
+
+  DATA_TYPE = 'metadata:pcap'
+
+  FORMAT_STRING_PIECES = [
+      u'Source IP: {source_ip}',
+      u'Destination IP: {dest_ip}',
+      u'Source Port: {source_port}',
+      u'Destination Port: {dest_port}',
+      u'Protocol: {protocol}',
+      u'Type: {stream_type}',
+      u'Size: {size}',
+      u'Protocol Data: {protocol_data}',
+      u'Stream Data: {stream_data}',
+      u'First Packet ID: {first_packet_id}',
+      u'Last Packet ID: {last_packet_id}',
+      u'Packet Count: {packet_count}']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Type: {stream_type}',
+      u'First Packet ID: {first_packet_id}']
+
+  SOURCE_LONG = 'Packet Capture File (pcap)'
+  SOURCE_SHORT = 'PCAP'
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a formatter for Plist Events."""
+
+from plaso.formatters import interface
+
+
+class PlistFormatter(interface.ConditionalEventFormatter):
+  """Event Formatter for plist keys."""
+
+  DATA_TYPE = 'plist:key'
+
+  FORMAT_STRING_SEPARATOR = u''
+
+  FORMAT_STRING_PIECES = [
+      u'{root}/',
+      u'{key}',
+      u' {desc}']
+
+  SOURCE_LONG = 'Plist Entry'
+  SOURCE_SHORT = 'PLIST'
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for PL-Sql Recall events."""
+
+from plaso.formatters import interface
+
+
+class PlsRecallFormatter(interface.EventFormatter):
+  """Formatter for a for a PL-Sql Recall file container."""
+  DATA_TYPE = 'PLSRecall:event'
+  SOURCE_LONG = 'PL-Sql Developer Recall file'
+  SOURCE_SHORT = 'PLSRecall'
+
+  # The format string.
+  FORMAT_STRING = (u'Sequence #{sequence} User: {username} '
+                   u'Database Name: {database_name} Query: {query}')
+  FORMAT_STRING_SHORT = u'{sequence} {username} {database_name} {query}'
+
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Popularity Contest parser events."""
+
+from plaso.formatters import interface
+
+
+class PopularityContestSessionFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Popularity Contest Session information."""
+
+  DATA_TYPE = 'popularity_contest:session:event'
+
+  FORMAT_STRING_PIECES = [
+      u'Session {session}',
+      u'{status}',
+      u'ID {hostid}',
+      u'[{details}]']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Session {session}',
+      u'{status}']
+
+  SOURCE_LONG = 'Popularity Contest Session'
+  SOURCE_SHORT = 'LOG'
+
+
+class PopularityContestLogFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Popularity Contest Log events."""
+
+  DATA_TYPE = 'popularity_contest:log:event'
+
+  FORMAT_STRING_PIECES = [
+      u'mru [{mru}]',
+      u'package [{package}]',
+      u'tag [{record_tag}]']
+
+  FORMAT_STRING_SHORT_PIECES = [u'{mru}']
+
+  SOURCE_LONG = 'Popularity Contest Log'
+  SOURCE_SHORT = 'LOG'
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Formatter for the Windows recycle files."""
+
+from plaso.lib import errors
+from plaso.formatters import interface
+
+
+class WinRecyclerFormatter(interface.ConditionalEventFormatter):
+  """Formatter for Windows recycle bin events."""
+
+  DATA_TYPE = 'windows:metadata:deleted_item'
+
+  DRIVE_LIST = {
+      0x00: 'A',
+      0x01: 'B',
+      0x02: 'C',
+      0x03: 'D',
+      0x04: 'E',
+      0x05: 'F',
+      0x06: 'G',
+      0x07: 'H',
+      0x08: 'I',
+      0x09: 'J',
+      0x0A: 'K',
+      0x0B: 'L',
+      0x0C: 'M',
+      0x0D: 'N',
+      0x0E: 'O',
+      0x0F: 'P',
+      0x10: 'Q',
+      0x11: 'R',
+      0x12: 'S',
+      0x13: 'T',
+      0x14: 'U',
+      0x15: 'V',
+      0x16: 'W',
+      0x17: 'X',
+      0x18: 'Y',
+      0x19: 'Z',
+  }
+
+  # The format string.
+  FORMAT_STRING_PIECES = [
+      u'DC{index} ->',
+      u'{orig_filename}',
+      u'[{orig_filename_legacy}]',
+      u'(from drive {drive_letter})']
+
+  FORMAT_STRING_SHORT_PIECES = [
+      u'Deleted file: {orig_filename}']
+
+  SOURCE_LONG = 'Recycle Bin'
+  SOURCE_SHORT = 'RECBIN'
+
+  def GetMessages(self, event_object):
+    """Return the message strings."""
+    if self.DATA_TYPE != event_object.data_type:
+      raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
+          event_object.data_type))
+
+    if hasattr(event_object, 'drive_number'):
+      event_object.drive_letter = self.DRIVE_LIST.get(
+          event_object.drive_number, 'C?')
+
+    return super(WinRecyclerFormatter, self).GetMessages(event_object)
+
@@ -0,0 +1,422 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This file contains formatters for the parsed Rubanetra events. Additionally, a Java Instant formatter was defined
+as well."""
+from plaso.formatters import interface
+
+__author__ = 'Stefan Swerk (stefan_rubanetra@swerk.priv.at)'
+
+
+class RubanetraBaseActivityFormatter(interface.ConditionalEventFormatter):
+  """ Formatter for a Rubanetra BaseActivity """
+
+  DATA_TYPE = 'java:rubanetra:base_activity'
+  SOURCE_SHORT = 'LOG'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.BaseActivity'
+
+  FORMAT_STRING_PIECES = [
+    u'activityType: \'{activity_type}\'',
+    u'firstTimestamp: \'{first_timestamp}\'',
+    u'lastTimestamp: \'{last_timestamp}\'',
+    u'description: \'{description}\'',
+    u'sourceAddress: \'{source_address}\'',
+    u'destinationAddress: \'{destination_address}\'',
+    u'compoundFrameNumbers: \'{compound_frame_number_list}\'',
+    u'isReplaced: \'{replaced}\'',
+    u'optionalFields: \'{optional_field_dict}\'']
+
+
+class RubanetraPcapActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:pcap_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.PcapActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES \
+                         + [u'totalSize: \'{pcap_total_size}\'',
+                            u'frameNumber: \'{pcap_frame_number}\'',
+                            u'wireLength: \'{pcap_packet_wirelen}\'',
+                            u'headerCount: \'{pcap_header_count}\'']
+
+
+class RubanetraHttpRequestActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:http_request_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.HttpRequestActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'serverAddress: \'{server_address}\'',
+                          u'clientAddress: \'{client_address}\'',
+                          u'httpVersion: \'{http_version}\'',
+                          u'httpMethod: \'{http_method}\'',
+                          u'httpQueryString: \'{http_query_string}\'',
+                          u'httpQueryParameters: \'{http_query_parameters}\'',
+                          u'httpRequestHeader: \'{http_request_header_dict}\'',
+                          u'url: \'{url}\'',
+                          u'originalHttpHeader: \'{orig_http_header}\'',
+                          u'contentType: \'{content_type}\'',
+                          u'isResponse: \'{is_response}\'',
+                          u'JNetPcapHttpString: \'{jnetpcap_http_string}\'']
+
+
+class RubanetraHttpResponseActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:http_response_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.HttpResponseActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'httpVersion: \'{http_version}\'',
+                          u'httpStatusCode: \'{response_status_code}\'',
+                          u'httpStatusLine: \'{response_status_line}\'',
+                          u'httpResponseHeader: \'{response_header_dict}\'',
+                          u'originalHttpHeader: \'{orig_http_header}\'',
+                          u'contentType: \'{content_type}\'',
+                          u'JNetPcapHttpString: \'{jnetpcap_http_string}\'']
+
+
+class RubanetraDnsActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:dns_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.DnsActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'questionRecords: \'{question_record_list}\'',
+                          u'answerRecords: \'{answer_record_list}\'',
+                          u'authorityRecords: \'{authority_record_list}\'',
+                          u'additionalRecords: \'{additional_record_list}\'',
+                          u'dnsMessageHeader: \'{dns_message_header}\'',
+                          u'isResponse: \'{is_response_bool}\'']
+
+
+class RubanetraHttpImageActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:http_image_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.HttpImageActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'imageType: \'{image_type}\'',
+                          u'imagePath: \'{image_path}\'']
+
+
+class RubanetraArpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:arp_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.ArpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'hardwareType: \'{hardware_type}\'',
+                          u'protocolType: \'{protocol_type}\'',
+                          u'hardwareAddressLength: \'{hardware_address_length}\'',
+                          u'protocolAddressLength: \'{protocol_address_length}\'',
+                          u'senderHardwareAddress: \'{sender_mac_address}\'',
+                          u'targetHardwareAddress: \'{target_mac_address}\'',
+                          u'senderProtocolAddress: \'{sender_protocol_address}\'',
+                          u'targetProtocolAddress: \'{target_protocol_address}\'',
+                          u'JNetPcapArpString: \'{jnetpcap_arp}\'']
+
+
+class RubanetraDhcpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:dhcp_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.DhcpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'dhcpMessage: \'{dhcp_message}\'']
+
+
+class RubanetraEthernetActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:ethernet_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.EthernetActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'sourceMacAddress: \'{source_mac_address}\'',
+                          u'destinationMacAddress: \'{destination_mac_address}\'',
+                          u'ethernetType: \'{ethernet_type}\'',
+                          u'ethernetTypeEnum: \'{ethernet_type_enum}\'',
+                          u'JNetPcapEthernetString: \'{jnetpcap_ethernet}\'']
+
+
+class RubanetraFtpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:ftp_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.FtpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'ftpActivityType: \'{ftp_type}\'',
+                          u'command: \'{command}\'',
+                          u'reply: \'{reply}\'',
+                          u'list: \'{list}\'']
+
+
+class RubanetraIcmpv4ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:icmpv4_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv4Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'icmpSubType: \'{icmp_subtype}\'',
+                          u'icmpPacket: \'{icmp_packet}\'',
+                          u'icmpMessage: \'{icmp_message}\'',
+                          u'icmpType: \'{icmp_type}\'',
+                          u'icmpCode: \'{icmp_code}\'',
+                          u'sourceAddress: \'{source_address}\'',
+                          u'destinationAddress: \'{destination_address}\'',
+                          u'identifier: \'{identifier}\'',
+                          u'sequence: \'{sequence}\'',
+                          u'JNetPcapIcmpString: \'{jnetpcap_icmp}\'']
+
+
+class RubanetraIcmpv6ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:icmpv6_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv6Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'icmpSubType: \'{icmp_subtype}\'',
+                          u'icmpPacket: \'{icmp_packet}\'',
+                          u'icmpMessage: \'{icmp_message}\'',
+                          u'icmpType: \'{icmp_type}\'',
+                          u'JNetPcapIcmpString: \'{jnetpcap_icmp}\'']
+
+
+class RubanetraIpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:ip_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.IpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'version: \'{version}\'',
+                          u'protocol: \'{protocol}\'',
+                          u'sourceAddress: \'{source_address}\'',
+                          u'destinationAddress: \'{destination_address}\'']
+
+
+class RubanetraIpv4ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:ipv4_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv4Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'internetHeaderLength: \'{internet_header_length}\'',
+                          u'differentiatedServicesCodePoint: \'{differentiated_services_code_point}\'',
+                          u'totalLength: \'{total_length}\'',
+                          u'identification: \'{identification}\'',
+                          u'flags: \'{flags}\'',
+                          u'fragmentOffset: \'{fragment_offset}\'',
+                          u'timeToLive: \'{time_to_live}\'',
+                          u'headerChecksum: \'{header_checksum}\'',
+                          u'options: \'{options}\'',
+                          u'JNetPcapIpv4String: \'{jnetpcap_ip4}\'']
+
+
+class RubanetraIpv6ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:ipv6_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv6Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'trafficClass: \'{traffic_class}\'',
+                          u'flowLabel: \'{flow_label}\'',
+                          u'payloadLength: \'{payload_length}\'',
+                          u'nextHeader: \'{next_header}\'',
+                          u'hopLimit: \'{hop_limit}\'',
+                          u'JNetPcapIpv6String: \'{jnetpcap_ip6}\'',
+                          u'KrakenIpv6String: \'{kraken_ip6}\'']
+
+
+class RubanetraMsnActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:msn_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.MsnActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'account: \'{account}\'',
+                          u'chat: \'{chat}\'']
+
+
+class RubanetraNetbiosActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:Netbios_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.NetbiosActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'datagramPacket: \'{datagram_packet}\'',
+                          u'namePacket: \'{name_packet}\'']
+
+
+class RubanetraPop3ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:pop3_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Pop3Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'subType: \'{sub_type}\'',
+                          u'header: \'{header}\'',
+                          u'data: \'{data}\'',
+                          u'command: \'{command}\'',
+                          u'response: \'{response}\'']
+
+
+class RubanetraSmtpCommandActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:smtp_command_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpCommandActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'command: \'{command}\'',
+                          u'parameter: \'{parameter}\'']
+
+
+class RubanetraSmtpReplyActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:smtp_reply_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpReplyActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'code: \'{code}\'',
+                          u'message: \'{message}\'']
+
+
+class RubanetraSmtpSendActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:smtp_send_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpSendActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'header: \'{header}\'',
+                          u'data: \'{data}\'']
+
+
+class RubanetraSnmpv1ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:snmpv1_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv1Activity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'pdu: \'{pdu}\'',
+                          u'sourceSocketAddress: \'{source_socket_address}\'',
+                          u'destinationSocketAddress: \'{destination_socket_address}\'']
+
+
+class RubanetraSnmpv2ActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:snmpv2_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv2Activity'
+
+  FORMAT_STRING_PIECES = RubanetraSnmpv1ActivityFormatter.FORMAT_STRING_PIECES
+
+
+class RubanetraTcpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:tcp_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.TcpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'sourcePort: \'{source_port}\'',
+                          u'destinationPort: \'{destination_port}\'',
+                          u'sequenceNumber: \'{sequence_number}\'',
+                          u'acknowledgeNumber: \'{acknowledge_number}\'',
+                          u'relativeSequenceNumber: \'{relative_sequence_number}\'',
+                          u'relativeAcknowledgeNumber: \'{relative_acknowledge_number}\'',
+                          u'dataOffset: \'{data_offset}\'',
+                          u'controlBits: \'{control_bits}\'',
+                          u'windowSize: \'{window_size}\'',
+                          u'checksum: \'{checksum}\'',
+                          u'urgentPointer: \'{urgent_pointer}\'',
+                          u'tcpLength: \'{tcp_length}\'',
+                          u'options: \'{options}\'',
+                          u'padding: \'{padding}\'',
+                          u'syn: \'{syn}\'',
+                          u'ack: \'{ack}\'',
+                          u'psh: \'{psh}\'',
+                          u'fin: \'{fin}\'',
+                          u'rst: \'{rst}\'',
+                          u'urg: \'{urg}\'',
+                          u'direction: \'{direction}\'',
+                          u'clientState: \'{client_state}\'',
+                          u'serverState: \'{server_state}\'',
+                          u'JNetPcapTcpString: \'{jnetpcap_tcp}\'',
+                          u'sourceAddress: \'{source_address}\'',
+                          u'destinationAddress: \'{destination_address}\'',
+                          u'sourceSocketAddress: \'{source_socket_address}\'',
+                          u'destinationSocketAddress: \'{destination_socket_address}\'']
+
+
+class RubanetraTelnetActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:telnet_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.TelnetActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'subType: \'{sub_type}\'',
+                          u'command: \'{command}\'',
+                          u'option: \'{option}\'',
+                          u'ansiMode: \'{ansi_mode}\'',
+                          u'arguments: \'{arguments}\'',
+                          u'text: \'{text}\'',
+                          u'title: \'{title}\'']
+
+
+class RubanetraTlsActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:tls_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.TlsActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'clientToServerTraffic: \'{client_to_server_traffic}\'',
+                          u'serverToClientTraffic: \'{server_to_client_traffic}\'']
+
+
+class RubanetraUdpActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:udp_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.UdpActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'sourcePort: \'{source_port}\'',
+                          u'destinationPort: \'{destination_port}\'',
+                          u'length: \'{length}\'',
+                          u'checksum: \'{checksum}\'',
+                          u'JNetPcapUdpString: \'{jnetpcap_udp}\'',
+                          u'sourceSocketAddress: \'{source_socket_address}\'',
+                          u'destinationSocketAddress: \'{destination_socket_address}\'']
+
+
+class RubanetraOpenSSHActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:open_ssh_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.OpenSSHActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'clientToServerTraffic: \'{client_to_server_traffic}\'',
+                          u'serverToClientTraffic: \'{server_to_client_traffic}\'']
+
+
+class RubanetraDropboxTlsActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:dropbox_tls_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.DropboxActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'clientAddress: \'{client_address}\'',
+                          u'serverAddress: \'{server_address}\'']
+
+
+class RubanetraSpiderOakActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:spideroak_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.SpiderOakActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'clientAddress: \'{client_address}\'',
+                          u'serverAddress: \'{server_address}\'']
+
+
+class RubanetraSkypePayloadActivityFormatter(RubanetraBaseActivityFormatter):
+  DATA_TYPE = 'java:rubanetra:skype_payload_activity'
+  SOURCE_LONG = 'at.jku.fim.rubanetra.SkypePayloadActivity'
+
+  FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
+                         [u'sourceObjectId: \'{source_object_id}\'',
+                          u'destinationObjectId: \'{destination_object_id}\'',
+                          u'sourceHost: \'{source_host}\'',
+                          u'destinationHost: \'{destination_host}\'']
+
+
+class JavaInstantFormatter(interface.EventFormatter):
+  """ Formatter for a Java Instant """
+
+  DATA_TYPE = 'java:time:Instant'
+  SOURCE_SHORT = 'JAVA'
+  SOURCE_LONG = 'java.time.Instant'
+
+  FORMAT_STRING = (
+    u'epoch_seconds: \'{instant_epoch_seconds}, nano: \'{instant_nano}\'')
+  FORMAT_STRING_SHORT = (u'{instant_epoch_seconds}.{instant_nano}\'')
--- a/Show More
+++ b/Show More