Import from old repository

2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,202 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The file format classifier."""
+
+# TODO: rewrite most of the classifier in C and integrate with the code in:
+# plaso/classifier
+
+import gzip
+import logging
+import os
+import tarfile
+import zipfile
+import zlib
+
+from dfvfs.lib import definitions
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.lib import errors
+
+
+class Classifier(object):
+  """Class that defines the file format classifier."""
+
+  _MAGIC_VALUES = {
+      'ZIP': {'length': 4, 'offset': 0, 'values': ['P', 'K', '\x03', '\x04']},
+      'TAR': {'length': 5, 'offset': 257, 'values': ['u', 's', 't', 'a', 'r']},
+      'GZ': {'length': 2, 'offset': 0, 'values': ['\x1f', '\x8b']},
+  }
+
+  # TODO: Remove this logic when the classifier is ready.
+  # This is only used temporary until files can be classified.
+  magic_max_length = 0
+
+  # Defines the maximum depth into a file (for SmartOpenFiles).
+  MAX_FILE_DEPTH = 3
+
+  @classmethod
+  def _SmartOpenFile(cls, file_entry):
+    """Return a generator for all pathspec protobufs extracted from a file.
+
+    If the file is compressed then extract all members and include
+    them into the processing queue.
+
+    Args:
+      file_entry: The file entry object.
+
+    Yields:
+      A path specification (instance of dfvfs.PathSpec) of embedded file
+      entries.
+    """
+    file_object = file_entry.GetFileObject()
+
+    # TODO: Remove when classifier gets deployed. Then we
+    # call the classifier here and use that for definition (and
+    # then we forward the classifier definition in the pathspec
+    # protobuf.
+    file_object.seek(0, os.SEEK_SET)
+
+    if not cls.magic_max_length:
+      for magic_value in cls._MAGIC_VALUES.values():
+        cls.magic_max_length = max(
+            cls.magic_max_length,
+            magic_value['length'] + magic_value['offset'])
+
+    header = file_object.read(cls.magic_max_length)
+
+    file_classification = ''
+    # Go over each and every magic value defined and compare
+    # each read byte (according to original offset and current one)
+    # If all match, then we have a particular file format and we
+    # can move on.
+    for m_value, m_dict in cls._MAGIC_VALUES.items():
+      length = m_dict['length'] + m_dict['offset']
+      if len(header) < length:
+        continue
+
+      offset = m_dict['offset']
+      magic = m_dict['values']
+
+      if header[offset:offset + len(magic)] == ''.join(magic):
+        file_classification = m_value
+        break
+
+    # TODO: refactor the file type specific code into sub functions.
+    if file_classification == 'ZIP':
+      try:
+        file_object.seek(0, os.SEEK_SET)
+        zip_file = zipfile.ZipFile(file_object, 'r')
+
+        # TODO: Make this is a more "sane" check, and perhaps
+        # not entirely skip the file if it has this particular
+        # ending, but for now, this both slows the tool down
+        # considerably and makes it also more unstable.
+        _, _, filename_extension = file_entry.name.rpartition(u'.')
+
+        if filename_extension in [u'.jar', u'.sym', u'.xpi']:
+          file_object.close()
+          logging.debug(
+              u'Unsupported ZIP sub type: {0:s} detected in file: {1:s}'.format(
+                  filename_extension, file_entry.path_spec.comparable))
+          return
+
+        for info in zip_file.infolist():
+          if info.file_size > 0:
+            logging.debug(
+                u'Including: {0:s} from ZIP into process queue.'.format(
+                    info.filename))
+
+            yield path_spec_factory.Factory.NewPathSpec(
+                definitions.TYPE_INDICATOR_ZIP, location=info.filename,
+                parent=file_entry.path_spec)
+
+      except zipfile.BadZipfile:
+        pass
+
+    elif file_classification == 'GZ':
+      try:
+        type_indicator = file_entry.path_spec.type_indicator
+        if type_indicator == definitions.TYPE_INDICATOR_GZIP:
+          raise errors.SameFileType
+
+        file_object.seek(0, os.SEEK_SET)
+        gzip_file = gzip.GzipFile(fileobj=file_object, mode='rb')
+        _ = gzip_file.read(4)
+        gzip_file.close()
+
+        logging.debug((
+            u'Including: {0:s} as GZIP compressed stream into process '
+            u'queue.').format(file_entry.name))
+
+        yield path_spec_factory.Factory.NewPathSpec(
+            definitions.TYPE_INDICATOR_GZIP, parent=file_entry.path_spec)
+
+      except (IOError, zlib.error, errors.SameFileType):
+        pass
+
+    # TODO: Add BZ2 support.
+    elif file_classification == 'TAR':
+      try:
+        file_object.seek(0, os.SEEK_SET)
+        tar_file = tarfile.open(fileobj=file_object, mode='r')
+
+        for name_info in tar_file.getmembers():
+          if not name_info.isfile():
+            continue
+
+          name = name_info.path
+          logging.debug(
+              u'Including: {0:s} from TAR into process queue.'.format(name))
+
+          yield path_spec_factory.Factory.NewPathSpec(
+              definitions.TYPE_INDICATOR_TAR, location=name,
+              parent=file_entry.path_spec)
+
+      except tarfile.ReadError:
+        pass
+
+    file_object.close()
+
+  @classmethod
+  def SmartOpenFiles(cls, file_entry, depth=0):
+    """Generate a list of all available PathSpecs extracted from a file.
+
+    Args:
+      file_entry: A file entry object.
+      depth: Incrementing number that defines the current depth into
+             a file (file inside a ZIP file is depth 1, file inside a tar.gz
+             would be of depth 2).
+
+    Yields:
+      A file entry object (instance of dfvfs.FileEntry).
+    """
+    if depth >= cls.MAX_FILE_DEPTH:
+      return
+
+    for path_spec in cls._SmartOpenFile(file_entry):
+      sub_file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
+      if sub_file_entry is None:
+        logging.debug(
+            u'Unable to open file: {0:s}'.format(path_spec.comparable))
+        continue
+      yield sub_file_entry
+
+      depth += 1
+      for sub_file_entry in cls.SmartOpenFiles(sub_file_entry, depth=depth):
+        yield sub_file_entry
@@ -0,0 +1,421 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generic collector that supports both file system and image files."""
+
+import hashlib
+import logging
+import os
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.lib import errors as dfvfs_errors
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.engine import queue
+from plaso.lib import errors
+
+
+class Collector(queue.ItemQueueProducer):
+  """Class that implements a collector object."""
+
+  def __init__(
+      self, process_queue, source_path, source_path_spec,
+      resolver_context=None):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      source_path: Path of the source file or directory.
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None.
+    """
+    super(Collector, self).__init__(process_queue)
+    self._filter_find_specs = None
+    self._fs_collector = FileSystemCollector(process_queue)
+    self._resolver_context = resolver_context
+    # TODO: remove the need to pass source_path
+    self._source_path = os.path.abspath(source_path)
+    self._source_path_spec = source_path_spec
+    self._vss_stores = None
+
+  def __enter__(self):
+    """Enters a with statement."""
+    return self
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Exits a with statement."""
+    return
+
+  def _ProcessImage(self, volume_path_spec, find_specs=None):
+    """Processes a volume within a storage media image.
+
+    Args:
+      volume_path_spec: The path specification of the volume containing
+                        the file system.
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    if find_specs:
+      logging.debug(u'Collecting from image file: {0:s} with filter'.format(
+          self._source_path))
+    else:
+      logging.debug(u'Collecting from image file: {0:s}'.format(
+          self._source_path))
+
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    try:
+      file_system = path_spec_resolver.Resolver.OpenFileSystem(
+          path_spec, resolver_context=self._resolver_context)
+    except IOError as exception:
+      logging.error(
+          u'Unable to open file system with error: {0:s}'.format(exception))
+      return
+
+    try:
+      self._fs_collector.Collect(
+          file_system, path_spec, find_specs=find_specs)
+    except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+      logging.warning(u'{0:s}'.format(exception))
+
+      if find_specs:
+        logging.debug(u'Collection from image with filter FAILED.')
+      else:
+        logging.debug(u'Collection from image FAILED.')
+      return
+
+    if self._abort:
+      return
+
+    if self._vss_stores:
+      self._ProcessVSS(volume_path_spec, find_specs=find_specs)
+
+    if find_specs:
+      logging.debug(u'Collection from image with filter COMPLETED.')
+    else:
+      logging.debug(u'Collection from image COMPLETED.')
+
+  def _ProcessVSS(self, volume_path_spec, find_specs=None):
+    """Processes a VSS volume within a storage media image.
+
+    Args:
+      volume_path_spec: The path specification of the volume containing
+                        the file system.
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    logging.info(u'Processing VSS.')
+
+    vss_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/',
+        parent=volume_path_spec)
+
+    vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        vss_path_spec, resolver_context=self._resolver_context)
+
+    number_of_vss = vss_file_entry.number_of_sub_file_entries
+
+    # In plaso 1 represents the first store index in dfvfs and pyvshadow 0
+    # represents the first store index so 1 is subtracted.
+    vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]
+
+    for store_index in vss_store_range:
+      if self._abort:
+        return
+
+      if find_specs:
+        logging.info((
+            u'Collecting from VSS volume: {0:d} out of: {1:d} '
+            u'with filter').format(store_index + 1, number_of_vss))
+      else:
+        logging.info(u'Collecting from VSS volume: {0:d} out of: {1:d}'.format(
+            store_index + 1, number_of_vss))
+
+      vss_path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index,
+          parent=volume_path_spec)
+      path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+          parent=vss_path_spec)
+
+      file_system = path_spec_resolver.Resolver.OpenFileSystem(
+          path_spec, resolver_context=self._resolver_context)
+
+      try:
+        self._fs_collector.Collect(
+            file_system, path_spec, find_specs=find_specs)
+      except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+        logging.warning(u'{0:s}'.format(exception))
+
+        if find_specs:
+          logging.debug(
+              u'Collection from VSS store: {0:d} with filter FAILED.'.format(
+                  store_index + 1))
+        else:
+          logging.debug(u'Collection from VSS store: {0:d} FAILED.'.format(
+              store_index + 1))
+        return
+
+      if find_specs:
+        logging.debug(
+            u'Collection from VSS store: {0:d} with filter COMPLETED.'.format(
+                store_index + 1))
+      else:
+        logging.debug(u'Collection from VSS store: {0:d} COMPLETED.'.format(
+            store_index + 1))
+
+  def Collect(self):
+    """Collects files from the source."""
+    source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        self._source_path_spec, resolver_context=self._resolver_context)
+
+    if not source_file_entry:
+      logging.warning(u'No files to collect.')
+      self.SignalEndOfInput()
+      return
+
+    if (not source_file_entry.IsDirectory() and
+        not source_file_entry.IsFile() and
+        not source_file_entry.IsDevice()):
+      raise errors.CollectorError(
+          u'Source path: {0:s} not a device, file or directory.'.format(
+              self._source_path))
+
+    type_indicator = self._source_path_spec.type_indicator
+    if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
+      if source_file_entry.IsFile():
+        self.ProduceItem(self._source_path_spec)
+
+      else:
+        file_system = path_spec_resolver.Resolver.OpenFileSystem(
+            self._source_path_spec, resolver_context=self._resolver_context)
+
+        try:
+          self._fs_collector.Collect(
+              file_system, self._source_path_spec,
+              find_specs=self._filter_find_specs)
+        except (dfvfs_errors.AccessError,
+                dfvfs_errors.BackEndError) as exception:
+          logging.warning(u'{0:s}'.format(exception))
+
+    else:
+      self._ProcessImage(
+          self._source_path_spec.parent, find_specs=self._filter_find_specs)
+
+    self.SignalEndOfInput()
+
+  def SetCollectDirectoryMetadata(self, collect_directory_metadata):
+    """Sets the collect directory metadata flag.
+
+    Args:
+      collect_directory_metadata: Boolean value to indicate to collect
+                                  directory metadata.
+    """
+    self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata)
+
+  def SetFilter(self, filter_find_specs):
+    """Sets the collection filter find specifications.
+
+    Args:
+      filter_find_specs: List of filter find specifications (instances of
+                         dfvfs.FindSpec).
+    """
+    self._filter_find_specs = filter_find_specs
+
+  def SetVssInformation(self, vss_stores):
+    """Sets the Volume Shadow Snapshots (VSS) information.
+
+       This function will enable VSS collection.
+
+    Args:
+      vss_stores: The range of VSS stores to include in the collection,
+                  where 1 represents the first store.
+    """
+    self._vss_stores = vss_stores
+
+  def SignalAbort(self):
+    """Signals the producer to abort."""
+    super(Collector, self).SignalAbort()
+    self._fs_collector.SignalAbort()
+
+
+class FileSystemCollector(queue.ItemQueueProducer):
+  """Class that implements a file system collector object."""
+
+  def __init__(self, process_queue):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+    """
+    super(FileSystemCollector, self).__init__(process_queue)
+    self._collect_directory_metadata = True
+    self._duplicate_file_check = False
+    self._hashlist = {}
+
+    self.number_of_file_entries = 0
+
+  def __enter__(self):
+    """Enters a with statement."""
+    return self
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Exits a with statement."""
+    return
+
+  def _CalculateNTFSTimeHash(self, file_entry):
+    """Return a hash value calculated from a NTFS file's metadata.
+
+    Args:
+      file_entry: The file entry (instance of TSKFileEntry).
+
+    Returns:
+      A hash value (string) that can be used to determine if a file's timestamp
+    value has changed.
+    """
+    stat_object = file_entry.GetStat()
+    ret_hash = hashlib.md5()
+
+    ret_hash.update('atime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'atime', 0),
+        getattr(stat_object, 'atime_nano', 0)))
+
+    ret_hash.update('crtime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'crtime', 0),
+        getattr(stat_object, 'crtime_nano', 0)))
+
+    ret_hash.update('mtime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'mtime', 0),
+        getattr(stat_object, 'mtime_nano', 0)))
+
+    ret_hash.update('ctime:{0:d}.{1:d}'.format(
+        getattr(stat_object, 'ctime', 0),
+        getattr(stat_object, 'ctime_nano', 0)))
+
+    return ret_hash.hexdigest()
+
+  def _ProcessDirectory(self, file_entry):
+    """Processes a directory and extract its metadata if necessary."""
+    # Need to do a breadth-first search otherwise we'll hit the Python
+    # maximum recursion depth.
+    sub_directories = []
+
+    for sub_file_entry in file_entry.sub_file_entries:
+      if self._abort:
+        return
+
+      try:
+        if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
+          continue
+      except dfvfs_errors.BackEndError as exception:
+        logging.warning(
+            u'Unable to process file: {0:s} with error: {1:s}'.format(
+                sub_file_entry.path_spec.comparable.replace(
+                    u'\n', u';'), exception))
+        continue
+
+      # For TSK-based file entries only, ignore the virtual /$OrphanFiles
+      # directory.
+      if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
+        if file_entry.IsRoot() and sub_file_entry.name == u'$OrphanFiles':
+          continue
+
+      if sub_file_entry.IsDirectory():
+        # This check is here to improve performance by not producing
+        # path specifications that don't get processed.
+        if self._collect_directory_metadata:
+          self.ProduceItem(sub_file_entry.path_spec)
+          self.number_of_file_entries += 1
+
+        sub_directories.append(sub_file_entry)
+
+      elif sub_file_entry.IsFile():
+        # If we are dealing with a VSS we want to calculate a hash
+        # value based on available timestamps and compare that to previously
+        # calculated hash values, and only include the file into the queue if
+        # the hash does not match.
+        if self._duplicate_file_check:
+          hash_value = self._CalculateNTFSTimeHash(sub_file_entry)
+
+          inode = getattr(sub_file_entry.path_spec, 'inode', 0)
+          if inode in self._hashlist:
+            if hash_value in self._hashlist[inode]:
+              continue
+
+          self._hashlist.setdefault(inode, []).append(hash_value)
+
+        self.ProduceItem(sub_file_entry.path_spec)
+        self.number_of_file_entries += 1
+
+    for sub_file_entry in sub_directories:
+      if self._abort:
+        return
+
+      try:
+        self._ProcessDirectory(sub_file_entry)
+      except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
+        logging.warning(u'{0:s}'.format(exception))
+
+  def Collect(self, file_system, path_spec, find_specs=None):
+    """Collects files from the file system.
+
+    Args:
+      file_system: The file system (instance of dfvfs.FileSystem).
+      path_spec: The path specification (instance of dfvfs.PathSpec).
+      find_specs: Optional list of find specifications (instances of
+                  dfvfs.FindSpec). The default is None.
+    """
+    if find_specs:
+      searcher = file_system_searcher.FileSystemSearcher(file_system, path_spec)
+
+      for path_spec in searcher.Find(find_specs=find_specs):
+        if self._abort:
+          return
+
+        self.ProduceItem(path_spec)
+        self.number_of_file_entries += 1
+
+    else:
+      file_entry = file_system.GetFileEntryByPathSpec(path_spec)
+
+      self._ProcessDirectory(file_entry)
+
+  def SetCollectDirectoryMetadata(self, collect_directory_metadata):
+    """Sets the collect directory metadata flag.
+
+    Args:
+      collect_directory_metadata: Boolean value to indicate to collect
+                                  directory metadata.
+    """
+    self._collect_directory_metadata = collect_directory_metadata
@@ -0,0 +1,354 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The unit tests for the generic collector object."""
+
+import logging
+import os
+import shutil
+import tempfile
+import unittest
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import context
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.engine import collector
+from plaso.engine import queue
+from plaso.engine import single_process
+from plaso.engine import utils as engine_utils
+
+
+class TempDirectory(object):
+  """A self cleaning temporary directory."""
+
+  def __init__(self):
+    """Initializes the temporary directory."""
+    super(TempDirectory, self).__init__()
+    self.name = u''
+
+  def __enter__(self):
+    """Make this work with the 'with' statement."""
+    self.name = tempfile.mkdtemp()
+    return self.name
+
+  def __exit__(self, unused_type, unused_value, unused_traceback):
+    """Make this work with the 'with' statement."""
+    shutil.rmtree(self.name, True)
+
+
+class TestCollectorQueueConsumer(queue.ItemQueueConsumer):
+  """Class that implements a test collector queue consumer."""
+
+  def __init__(self, queue_object):
+    """Initializes the queue consumer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(TestCollectorQueueConsumer, self).__init__(queue_object)
+    self.path_specs = []
+
+  def _ConsumeItem(self, path_spec):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      path_spec: a path specification (instance of dfvfs.PathSpec).
+    """
+    self.path_specs.append(path_spec)
+
+  @property
+  def number_of_path_specs(self):
+    """The number of path specifications."""
+    return len(self.path_specs)
+
+  def GetFilePaths(self):
+    """Retrieves a list of file paths from the path specifications."""
+    file_paths = []
+    for path_spec in self.path_specs:
+      location = getattr(path_spec, 'location', None)
+      if location is not None:
+        file_paths.append(location)
+    return file_paths
+
+
+class CollectorTestCase(unittest.TestCase):
+  """The collector test case."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
+
+  # Show full diff results, part of TestCase so does not follow our naming
+  # conventions.
+  maxDiff = None
+
+  def _GetTestFilePath(self, path_segments):
+    """Retrieves the path of a test file relative to the test data directory.
+
+    Args:
+      path_segments: the path segments inside the test data directory.
+
+    Returns:
+      A path of the test file.
+    """
+    # Note that we need to pass the individual path segments to os.path.join
+    # and not a list.
+    return os.path.join(self._TEST_DATA_PATH, *path_segments)
+
+
+class CollectorTest(CollectorTestCase):
+  """Tests for the collector."""
+
+  def testFileSystemCollection(self):
+    """Test collection on the file system."""
+    test_files = [
+        self._GetTestFilePath([u'syslog.tgz']),
+        self._GetTestFilePath([u'syslog.zip']),
+        self._GetTestFilePath([u'syslog.bz2']),
+        self._GetTestFilePath([u'wtmp.1'])]
+
+    with TempDirectory() as dirname:
+      for a_file in test_files:
+        shutil.copy(a_file, dirname)
+
+      path_spec = path_spec_factory.Factory.NewPathSpec(
+          dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+
+      test_collection_queue = single_process.SingleProcessQueue()
+      resolver_context = context.Context()
+      test_collector = collector.Collector(
+          test_collection_queue, dirname, path_spec,
+          resolver_context=resolver_context)
+      test_collector.Collect()
+
+      test_collector_queue_consumer = TestCollectorQueueConsumer(
+          test_collection_queue)
+      test_collector_queue_consumer.ConsumeItems()
+
+      self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
+
+  def testFileSystemWithFilterCollection(self):
+    """Test collection on the file system with a filter."""
+    dirname = u'.'
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      temp_file.write('/test_data/testdir/filter_.+.txt\n')
+      temp_file.write('/test_data/.+evtx\n')
+      temp_file.write('/AUTHORS\n')
+      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, dirname, path_spec,
+        resolver_context=resolver_context)
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+    test_collector.SetFilter(find_specs)
+
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning((
+          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
+              filter_name, exception))
+
+    # Two files with test_data/testdir/filter_*.txt, AUTHORS
+    # and test_data/System.evtx.
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
+
+    paths = test_collector_queue_consumer.GetFilePaths()
+
+    current_directory = os.getcwd()
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_1.txt')
+    self.assertTrue(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_2.txt')
+    self.assertFalse(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'test_data', u'testdir', u'filter_3.txt')
+    self.assertTrue(expected_path in paths)
+
+    expected_path = os.path.join(
+        current_directory, u'AUTHORS')
+    self.assertTrue(expected_path in paths)
+
+  def testImageCollection(self):
+    """Test collection on a storage media image file.
+
+    This images has two files:
+      + logs/hidden.zip
+      + logs/sys.tgz
+
+    The hidden.zip file contains one file, syslog, which is the
+    same for sys.tgz.
+
+    The end results should therefore be:
+      + logs/hidden.zip (unchanged)
+      + logs/hidden.zip:syslog (the text file extracted out)
+      + logs/sys.tgz (unchanged)
+      + logs/sys.tgz (read as a GZIP file, so not compressed)
+      + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
+      + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)
+
+    This means that the collection script should collect 6 files in total.
+    """
+    test_file = self._GetTestFilePath([u'syslog_image.dd'])
+
+    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, test_file, path_spec,
+        resolver_context=resolver_context)
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 3)
+
+  def testImageWithFilterCollection(self):
+    """Test collection on a storage media image file with a filter."""
+    test_file = self._GetTestFilePath([u'ímynd.dd'])
+
+    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=volume_path_spec)
+
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      temp_file.write('/a_directory/.+zip\n')
+      temp_file.write('/a_directory/another.+\n')
+      temp_file.write('/passwords.txt\n')
+
+    test_collection_queue = single_process.SingleProcessQueue()
+    resolver_context = context.Context()
+    test_collector = collector.Collector(
+        test_collection_queue, test_file, path_spec,
+        resolver_context=resolver_context)
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+    test_collector.SetFilter(find_specs)
+
+    test_collector.Collect()
+
+    test_collector_queue_consumer = TestCollectorQueueConsumer(
+        test_collection_queue)
+    test_collector_queue_consumer.ConsumeItems()
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning((
+          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
+              filter_name, exception))
+
+    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
+
+    paths = test_collector_queue_consumer.GetFilePaths()
+
+    # path_specs[0]
+    # type: TSK
+    # file_path: '/a_directory/another_file'
+    # container_path: 'test_data/ímynd.dd'
+    # image_offset: 0
+    self.assertEquals(paths[0], u'/a_directory/another_file')
+
+    # path_specs[1]
+    # type: TSK
+    # file_path: '/passwords.txt'
+    # container_path: 'test_data/ímynd.dd'
+    # image_offset: 0
+    self.assertEquals(paths[1], u'/passwords.txt')
+
+
+class BuildFindSpecsFromFileTest(unittest.TestCase):
+  """Tests for the BuildFindSpecsFromFile function."""
+
+  def testBuildFindSpecsFromFile(self):
+    """Tests the BuildFindSpecsFromFile function."""
+    filter_name = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+      filter_name = temp_file.name
+      # 2 hits.
+      temp_file.write('/test_data/testdir/filter_.+.txt\n')
+      # A single hit.
+      temp_file.write('/test_data/.+evtx\n')
+      # A single hit.
+      temp_file.write('/AUTHORS\n')
+      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
+      # This should not compile properly, missing file information.
+      temp_file.write('failing/\n')
+      # This should not fail during initial loading, but fail later on.
+      temp_file.write('bad re (no close on that parenthesis/file\n')
+
+    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
+
+    try:
+      os.remove(filter_name)
+    except (OSError, IOError) as exception:
+      logging.warning(
+          u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
+              filter_name, exception))
+
+    self.assertEquals(len(find_specs), 4)
+
+    dirname = u'.'
+    path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
+    file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
+    searcher = file_system_searcher.FileSystemSearcher(
+        file_system, path_spec)
+
+    path_spec_generator = searcher.Find(find_specs=find_specs)
+    self.assertNotEquals(path_spec_generator, None)
+
+    path_specs = list(path_spec_generator)
+    # One evtx, one AUTHORS, two filter_*.txt files, total 4 files.
+    self.assertEquals(len(path_specs), 4)
+
+    with self.assertRaises(IOError):
+      _ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist')
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,319 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The processing engine."""
+
+import abc
+import logging
+
+from dfvfs.helpers import file_system_searcher
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.resolver import resolver as path_spec_resolver
+
+from plaso.artifacts import knowledge_base
+from plaso.engine import collector
+from plaso.engine import queue
+from plaso.lib import errors
+from plaso.preprocessors import interface as preprocess_interface
+from plaso.preprocessors import manager as preprocess_manager
+
+
+class BaseEngine(object):
+  """Class that defines the processing engine base."""
+
+  def __init__(self, collection_queue, storage_queue, parse_error_queue):
+    """Initialize the engine object.
+
+    Args:
+      collection_queue: the collection queue object (instance of Queue).
+      storage_queue: the storage queue object (instance of Queue).
+      parse_error_queue: the parser error queue object (instance of Queue).
+    """
+    self._collection_queue = collection_queue
+    self._enable_debug_output = False
+    self._enable_profiling = False
+    self._event_queue_producer = queue.ItemQueueProducer(storage_queue)
+    self._filter_object = None
+    self._mount_path = None
+    self._open_files = False
+    self._parse_error_queue = parse_error_queue
+    self._parse_error_queue_producer = queue.ItemQueueProducer(
+        parse_error_queue)
+    self._profiling_sample_rate = 1000
+    self._source = None
+    self._source_path_spec = None
+    self._source_file_entry = None
+    self._text_prepend = None
+
+    self.knowledge_base = knowledge_base.KnowledgeBase()
+    self.storage_queue = storage_queue
+
+  def CreateCollector(
+      self, include_directory_stat, vss_stores=None, filter_find_specs=None,
+      resolver_context=None):
+    """Creates a collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      include_directory_stat: Boolean value to indicate whether directory
+                              stat information should be collected.
+      vss_stores: Optional list of VSS stores to include in the collection,
+                  where 1 represents the first store. Set to None if no
+                  VSS stores should be processed. The default is None.
+      filter_find_specs: Optional list of filter find specifications (instances
+                         of dfvfs.FindSpec). The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      A collector object (instance of Collector).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    collector_object = collector.Collector(
+        self._collection_queue, self._source, self._source_path_spec,
+        resolver_context=resolver_context)
+
+    collector_object.SetCollectDirectoryMetadata(include_directory_stat)
+
+    if vss_stores:
+      collector_object.SetVssInformation(vss_stores)
+
+    if filter_find_specs:
+      collector_object.SetFilter(filter_find_specs)
+
+    return collector_object
+
+  @abc.abstractmethod
+  def CreateExtractionWorker(self, worker_number):
+    """Creates an extraction worker object.
+
+    Args:
+      worker_number: A number that identifies the worker.
+
+    Returns:
+      An extraction worker (instance of worker.ExtractionWorker).
+    """
+
+  def GetSourceFileSystemSearcher(self, resolver_context=None):
+    """Retrieves the file system searcher of the source.
+
+    Args:
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      The file system searcher object (instance of dfvfs.FileSystemSearcher).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    file_system = path_spec_resolver.Resolver.OpenFileSystem(
+        self._source_path_spec, resolver_context=resolver_context)
+
+    type_indicator = self._source_path_spec.type_indicator
+    if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
+      mount_point = self._source_path_spec
+    else:
+      mount_point = self._source_path_spec.parent
+
+    return file_system_searcher.FileSystemSearcher(file_system, mount_point)
+
+  def PreprocessSource(self, platform, resolver_context=None):
+    """Preprocesses the source and fills the preprocessing object.
+
+    Args:
+      platform: string that indicates the platform (operating system).
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+    """
+    searcher = self.GetSourceFileSystemSearcher(
+        resolver_context=resolver_context)
+    if not platform:
+      platform = preprocess_interface.GuessOS(searcher)
+    self.knowledge_base.platform = platform
+
+    preprocess_manager.PreprocessPluginsManager.RunPlugins(
+        platform, searcher, self.knowledge_base)
+
+  def SetEnableDebugOutput(self, enable_debug_output):
+    """Enables or disables debug output.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the debug output
+                           should be enabled.
+    """
+    self._enable_debug_output = enable_debug_output
+
+  def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
+    """Enables or disables profiling.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the profiling
+                           should be enabled.
+      profiling_sample_rate: optional integer indicating the profiling sample
+                             rate. The value contains the number of files
+                             processed. The default value is 1000.
+    """
+    self._enable_profiling = enable_profiling
+    self._profiling_sample_rate = profiling_sample_rate
+
+  def SetFilterObject(self, filter_object):
+    """Sets the filter object.
+
+    Args:
+      filter_object: the filter object (instance of objectfilter.Filter).
+    """
+    self._filter_object = filter_object
+
+  def SetMountPath(self, mount_path):
+    """Sets the mount path.
+
+    Args:
+      mount_path: string containing the mount path.
+    """
+    self._mount_path = mount_path
+
+  # TODO: rename this mode.
+  def SetOpenFiles(self, open_files):
+    """Sets the open files mode.
+
+    Args:
+      open_files: boolean value to indicate if the worker should scan for
+                  file entries inside files.
+    """
+    self._open_files = open_files
+
+  def SetSource(self, source_path_spec, resolver_context=None):
+    """Sets the source.
+
+    Args:
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Raises:
+      BadConfigOption: if source cannot be set.
+    """
+    path_spec = source_path_spec
+    while path_spec.parent:
+      path_spec = path_spec.parent
+
+    # Note that source should be used for output purposes only.
+    self._source = getattr(path_spec, 'location', u'')
+    self._source_path_spec = source_path_spec
+
+    self._source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        self._source_path_spec, resolver_context=resolver_context)
+
+    if not self._source_file_entry:
+      raise errors.BadConfigOption(
+          u'No such device, file or directory: {0:s}.'.format(self._source))
+
+    if (not self._source_file_entry.IsDirectory() and
+        not self._source_file_entry.IsFile() and
+        not self._source_file_entry.IsDevice()):
+      raise errors.CollectorError(
+          u'Source path: {0:s} not a device, file or directory.'.format(
+              self._source))
+
+    if self._source_path_spec.type_indicator in [
+        dfvfs_definitions.TYPE_INDICATOR_OS,
+        dfvfs_definitions.TYPE_INDICATOR_FAKE]:
+
+      if self._source_file_entry.IsFile():
+        logging.debug(u'Starting a collection on a single file.')
+        # No need for multiple workers when parsing a single file.
+
+      elif not self._source_file_entry.IsDirectory():
+        raise errors.BadConfigOption(
+            u'Source: {0:s} has to be a file or directory.'.format(
+                self._source))
+
+  # TODO: remove this functionality.
+  def SetTextPrepend(self, text_prepend):
+    """Sets the text prepend.
+
+    Args:
+      text_prepend: string that contains the text to prepend to every
+                    event object.
+    """
+    self._text_prepend = text_prepend
+
+  def SignalAbort(self):
+    """Signals the engine to abort."""
+    logging.warning(u'Signalled abort.')
+    self._event_queue_producer.SignalEndOfInput()
+    self._parse_error_queue_producer.SignalEndOfInput()
+
+  def SignalEndOfInputStorageQueue(self):
+    """Signals the storage queue no input remains."""
+    self._event_queue_producer.SignalEndOfInput()
+    self._parse_error_queue_producer.SignalEndOfInput()
+
+  def SourceIsDirectory(self):
+    """Determines if the source is a directory.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_file_entry:
+      raise RuntimeError(u'Missing source.')
+
+    return (not self.SourceIsStorageMediaImage() and
+            self._source_file_entry.IsDirectory())
+
+  def SourceIsFile(self):
+    """Determines if the source is a file.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_file_entry:
+      raise RuntimeError(u'Missing source.')
+
+    return (not self.SourceIsStorageMediaImage() and
+            self._source_file_entry.IsFile())
+
+  def SourceIsStorageMediaImage(self):
+    """Determines if the source is storage media image file or device.
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    return self._source_path_spec.type_indicator not in [
+        dfvfs_definitions.TYPE_INDICATOR_OS,
+        dfvfs_definitions.TYPE_INDICATOR_FAKE]
@@ -0,0 +1,204 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Queue management implementation for Plaso.
+
+This file contains an implementation of a queue used by plaso for
+queue management.
+
+The queue has been abstracted in order to provide support for different
+implementations of the queueing mechanism, to support multi processing and
+scalability.
+"""
+
+import abc
+
+from plaso.lib import errors
+
+
+class QueueEndOfInput(object):
+  """Class that implements a queue end of input."""
+
+
+class Queue(object):
+  """Class that implements the queue interface."""
+
+  @abc.abstractmethod
+  def __len__(self):
+    """Returns the estimated current number of items in the queue."""
+
+  @abc.abstractmethod
+  def IsEmpty(self):
+    """Determines if the queue is empty."""
+
+  @abc.abstractmethod
+  def PushItem(self, item):
+    """Pushes an item onto the queue."""
+
+  @abc.abstractmethod
+  def PopItem(self):
+    """Pops an item off the queue."""
+
+  def SignalEndOfInput(self):
+    """Signals the queue no input remains."""
+    self.PushItem(QueueEndOfInput())
+
+
+class QueueConsumer(object):
+  """Class that implements the queue consumer interface.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  def __init__(self, queue_object):
+    """Initializes the queue consumer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(QueueConsumer, self).__init__()
+    self._abort = False
+    self._queue = queue_object
+
+  def SignalAbort(self):
+    """Signals the consumer to abort."""
+    self._abort = True
+
+
+class QueueProducer(object):
+  """Class that implements the queue producer interface.
+
+     The producer generates updates on the queue.
+  """
+
+  def __init__(self, queue_object):
+    """Initializes the queue producer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(QueueProducer, self).__init__()
+    self._abort = False
+    self._queue = queue_object
+
+  def SignalAbort(self):
+    """Signals the producer to abort."""
+    self._abort = True
+
+  def SignalEndOfInput(self):
+    """Signals the queue no input remains."""
+    self._queue.SignalEndOfInput()
+
+
+class EventObjectQueueConsumer(QueueConsumer):
+  """Class that implements the event object queue consumer.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  @abc.abstractmethod
+  def _ConsumeEventObject(self, event_object, **kwargs):
+    """Consumes an event object callback for ConsumeEventObjects."""
+
+  def ConsumeEventObjects(self, **kwargs):
+    """Consumes the event object that are pushed on the queue.
+
+       This function will issue a callback to _ConsumeEventObject for every
+       event object (instance of EventObject) consumed from the queue.
+
+    Args:
+      kwargs: keyword arguments to pass to the _ConsumeEventObject callback.
+    """
+    while not self._abort:
+      try:
+        item = self._queue.PopItem()
+      except errors.QueueEmpty:
+        break
+
+      if isinstance(item, QueueEndOfInput):
+        # Push the item back onto the queue to make sure all
+        # queue consumers are stopped.
+        self._queue.PushItem(item)
+        break
+
+      self._ConsumeEventObject(item, **kwargs)
+
+    self._abort = False
+
+
+class ItemQueueConsumer(QueueConsumer):
+  """Class that implements an item queue consumer.
+
+     The consumer subscribes to updates on the queue.
+  """
+
+  @abc.abstractmethod
+  def _ConsumeItem(self, item):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      item: the item object.
+    """
+
+  def ConsumeItems(self):
+    """Consumes the items that are pushed on the queue."""
+    while not self._abort:
+      try:
+        item = self._queue.PopItem()
+      except errors.QueueEmpty:
+        break
+
+      if isinstance(item, QueueEndOfInput):
+        # Push the item back onto the queue to make sure all
+        # queue consumers are stopped.
+        self._queue.PushItem(item)
+        break
+
+      self._ConsumeItem(item)
+
+    self._abort = False
+
+
+class ItemQueueProducer(QueueProducer):
+  """Class that implements an item queue producer.
+
+     The producer generates updates on the queue.
+  """
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    return
+
+  def ProduceItem(self, item):
+    """Produces an item onto the queue.
+
+    Args:
+      item: the item object.
+    """
+    try:
+      self._queue.PushItem(item)
+    except errors.QueueFull:
+      self._FlushQueue()
+
+  def ProduceItems(self, items):
+    """Produces items onto the queue.
+
+    Args:
+      items: a list or generator of item objects.
+    """
+    for item in items:
+      self.ProduceItem(item)
@@ -0,0 +1,366 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The single process processing engine."""
+
+import collections
+import logging
+import pdb
+
+from plaso.engine import collector
+from plaso.engine import engine
+from plaso.engine import queue
+from plaso.engine import worker
+from plaso.lib import errors
+from plaso.parsers import context as parsers_context
+
+
+class SingleProcessCollector(collector.Collector):
+  """Class that implements a single process collector object."""
+
+  def __init__(
+      self, process_queue, source_path, source_path_spec,
+      resolver_context=None):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      source_path: Path of the source file or directory.
+      source_path_spec: The source path specification (instance of
+                        dfvfs.PathSpec) as determined by the file system
+                        scanner. The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None.
+    """
+    super(SingleProcessCollector, self).__init__(
+        process_queue, source_path, source_path_spec,
+        resolver_context=resolver_context)
+
+    self._extraction_worker = None
+    self._fs_collector = SingleProcessFileSystemCollector(process_queue)
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    while not self._queue.IsEmpty():
+      logging.debug(u'Extraction worker started.')
+      self._extraction_worker.Run()
+      logging.debug(u'Extraction worker stopped.')
+
+  def SetExtractionWorker(self, extraction_worker):
+    """Sets the extraction worker.
+
+    Args:
+      extraction_worker: the extraction worker object (instance of
+                         EventExtractionWorker).
+    """
+    self._extraction_worker = extraction_worker
+
+    self._fs_collector.SetExtractionWorker(extraction_worker)
+
+
+class SingleProcessEngine(engine.BaseEngine):
+  """Class that defines the single process engine."""
+
+  def __init__(self, maximum_number_of_queued_items=0):
+    """Initialize the single process engine object.
+
+    Args:
+      maximum_number_of_queued_items: The maximum number of queued items.
+                                      The default is 0, which represents
+                                      no limit.
+    """
+    collection_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+    storage_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+    parse_error_queue = SingleProcessQueue(
+        maximum_number_of_queued_items=maximum_number_of_queued_items)
+
+    super(SingleProcessEngine, self).__init__(
+      collection_queue, storage_queue, parse_error_queue)
+
+    self._event_queue_producer = SingleProcessItemQueueProducer(storage_queue)
+    self._parse_error_queue_producer = SingleProcessItemQueueProducer(
+        parse_error_queue)
+
+  def CreateCollector(
+      self, include_directory_stat, vss_stores=None, filter_find_specs=None,
+      resolver_context=None):
+    """Creates a collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      include_directory_stat: Boolean value to indicate whether directory
+                              stat information should be collected.
+      vss_stores: Optional list of VSS stores to include in the collection,
+                  where 1 represents the first store. Set to None if no
+                  VSS stores should be processed. The default is None.
+      filter_find_specs: Optional list of filter find specifications (instances
+                         of dfvfs.FindSpec). The default is None.
+      resolver_context: Optional resolver context (instance of dfvfs.Context).
+                        The default is None. Note that every thread or process
+                        must have its own resolver context.
+
+    Returns:
+      A collector object (instance of Collector).
+
+    Raises:
+      RuntimeError: if source path specification is not set.
+    """
+    if not self._source_path_spec:
+      raise RuntimeError(u'Missing source.')
+
+    collector_object = SingleProcessCollector(
+        self._collection_queue, self._source, self._source_path_spec,
+        resolver_context=resolver_context)
+
+    collector_object.SetCollectDirectoryMetadata(include_directory_stat)
+
+    if vss_stores:
+      collector_object.SetVssInformation(vss_stores)
+
+    if filter_find_specs:
+      collector_object.SetFilter(filter_find_specs)
+
+    return collector_object
+
+  def CreateExtractionWorker(self, worker_number):
+    """Creates an extraction worker object.
+
+    Args:
+      worker_number: A number that identifies the worker.
+
+    Returns:
+      An extraction worker (instance of worker.ExtractionWorker).
+    """
+    parser_context = parsers_context.ParserContext(
+        self._event_queue_producer, self._parse_error_queue_producer,
+        self.knowledge_base)
+
+    extraction_worker = SingleProcessEventExtractionWorker(
+        worker_number, self._collection_queue, self._event_queue_producer,
+        self._parse_error_queue_producer, parser_context)
+
+    extraction_worker.SetEnableDebugOutput(self._enable_debug_output)
+
+    # TODO: move profiler in separate object.
+    extraction_worker.SetEnableProfiling(
+        self._enable_profiling,
+        profiling_sample_rate=self._profiling_sample_rate)
+
+    if self._open_files:
+      extraction_worker.SetOpenFiles(self._open_files)
+
+    if self._filter_object:
+      extraction_worker.SetFilterObject(self._filter_object)
+
+    if self._mount_path:
+      extraction_worker.SetMountPath(self._mount_path)
+
+    if self._text_prepend:
+      extraction_worker.SetTextPrepend(self._text_prepend)
+
+    return extraction_worker
+
+  def ProcessSource(
+      self, collector_object, storage_writer, parser_filter_string=None):
+    """Processes the source and extracts event objects.
+
+    Args:
+      collector_object: A collector object (instance of Collector).
+      storage_writer: A storage writer object (instance of BaseStorageWriter).
+      parser_filter_string: Optional parser filter string. The default is None.
+    """
+    extraction_worker = self.CreateExtractionWorker(0)
+
+    extraction_worker.InitalizeParserObjects(
+         parser_filter_string=parser_filter_string)
+
+    # Set the extraction worker and storage writer values so that they
+    # can be accessed if the QueueFull exception is raised. This is
+    # needed in single process mode to prevent the queue consuming too
+    # much memory.
+    collector_object.SetExtractionWorker(extraction_worker)
+    self._event_queue_producer.SetStorageWriter(storage_writer)
+    self._parse_error_queue_producer.SetStorageWriter(storage_writer)
+
+    logging.debug(u'Processing started.')
+
+    logging.debug(u'Collection started.')
+    collector_object.Collect()
+    logging.debug(u'Collection stopped.')
+
+    logging.debug(u'Extraction worker started.')
+    extraction_worker.Run()
+    logging.debug(u'Extraction worker stopped.')
+
+    self._event_queue_producer.SignalEndOfInput()
+
+    logging.debug(u'Storage writer started.')
+    storage_writer.WriteEventObjects()
+    logging.debug(u'Storage writer stopped.')
+
+    # Reset the extraction worker and storage writer values to return
+    # the objects in their original state. This will prevent access
+    # to the extraction worker outside this function and allow it
+    # to be garbage collected.
+    self._event_queue_producer.SetStorageWriter(None)
+    self._parse_error_queue_producer.SetStorageWriter(None)
+    collector_object.SetExtractionWorker(None)
+
+    logging.debug(u'Processing completed.')
+
+
+class SingleProcessEventExtractionWorker(worker.BaseEventExtractionWorker):
+  """Class that defines the single process event extraction worker."""
+
+  def _DebugParseFileEntry(self):
+    """Callback for debugging file entry parsing failures."""
+    pdb.post_mortem()
+
+
+class SingleProcessFileSystemCollector(collector.FileSystemCollector):
+  """Class that implements a single process file system collector object."""
+
+  def __init__(self, process_queue):
+    """Initializes the collector object.
+
+       The collector discovers all the files that need to be processed by
+       the workers. Once a file is discovered it is added to the process queue
+       as a path specification (instance of dfvfs.PathSpec).
+
+    Args:
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+    """
+    super(SingleProcessFileSystemCollector, self).__init__(process_queue)
+
+    self._extraction_worker = None
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    while not self._queue.IsEmpty():
+      logging.debug(u'Extraction worker started.')
+      self._extraction_worker.Run()
+      logging.debug(u'Extraction worker stopped.')
+
+  def SetExtractionWorker(self, extraction_worker):
+    """Sets the extraction worker.
+
+    Args:
+      extraction_worker: the extraction worker object (instance of
+                         EventExtractionWorker).
+    """
+    self._extraction_worker = extraction_worker
+
+
+class SingleProcessItemQueueProducer(queue.ItemQueueProducer):
+  """Class that implements a single process item queue producer."""
+
+  def __init__(self, queue_object):
+    """Initializes the queue producer.
+
+    Args:
+      queue_object: the queue object (instance of Queue).
+    """
+    super(SingleProcessItemQueueProducer, self).__init__(queue_object)
+
+    self._storage_writer = None
+
+  def _FlushQueue(self):
+    """Flushes the queue callback for the QueueFull exception."""
+    logging.debug(u'Storage writer started.')
+    self._storage_writer.WriteEventObjects()
+    logging.debug(u'Storage writer stopped.')
+
+  def SetStorageWriter(self, storage_writer):
+    """Sets the storage writer.
+
+    Args:
+      storage_writer: the storage writer object (instance of
+                      BaseStorageWriter).
+    """
+    self._storage_writer = storage_writer
+
+
+class SingleProcessQueue(queue.Queue):
+  """Single process queue."""
+
+  def __init__(self, maximum_number_of_queued_items=0):
+    """Initializes a single process queue object.
+
+    Args:
+      maximum_number_of_queued_items: The maximum number of queued items.
+                                      The default is 0, which represents
+                                      no limit.
+    """
+    super(SingleProcessQueue, self).__init__()
+
+    # The Queue interface defines the maximum number of queued items to be
+    # 0 if unlimited as does the multi processing queue, but deque uses
+    # None to indicate no limit.
+    if maximum_number_of_queued_items == 0:
+      maximum_number_of_queued_items = None
+
+    # maxlen contains the maximum number of items allowed to be queued,
+    # where None represents unlimited.
+    self._queue = collections.deque(
+        maxlen=maximum_number_of_queued_items)
+
+  def __len__(self):
+    """Returns the estimated current number of items in the queue."""
+    return len(self._queue)
+
+  def IsEmpty(self):
+    """Determines if the queue is empty."""
+    return len(self._queue) == 0
+
+  def PushItem(self, item):
+    """Pushes an item onto the queue.
+
+    Raises:
+      QueueFull: when the queue is full.
+    """
+    number_of_items = len(self._queue)
+
+    # Deque will drop the first item in the queue when maxlen is exceeded.
+    if not self._queue.maxlen or number_of_items < self._queue.maxlen:
+      self._queue.append(item)
+      number_of_items += 1
+
+    if self._queue.maxlen and number_of_items == self._queue.maxlen:
+      raise errors.QueueFull
+
+  def PopItem(self):
+    """Pops an item off the queue.
+
+    Raises:
+      QueueEmpty: when the queue is empty.
+    """
+    try:
+      # Using popleft to have FIFO behavior.
+      return self._queue.popleft()
+    except IndexError:
+      raise errors.QueueEmpty
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests the single process processing engine."""
+
+import os
+import unittest
+
+from dfvfs.lib import definitions as dfvfs_definitions
+from dfvfs.helpers import file_system_searcher
+from dfvfs.path import factory as path_spec_factory
+from dfvfs.resolver import context
+
+from plaso.engine import single_process
+from plaso.engine import test_lib
+from plaso.lib import errors
+
+
+class SingleProcessQueueTest(unittest.TestCase):
+  """Tests the single process queue."""
+
+  _ITEMS = frozenset(['item1', 'item2', 'item3', 'item4'])
+
+  def testPushPopItem(self):
+    """Tests the PushItem and PopItem functions."""
+    test_queue = single_process.SingleProcessQueue()
+
+    for item in self._ITEMS:
+      test_queue.PushItem(item)
+
+    self.assertEquals(len(test_queue), len(self._ITEMS))
+
+    test_queue.SignalEndOfInput()
+    test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
+    test_queue_consumer.ConsumeItems()
+
+    expected_number_of_items = len(self._ITEMS)
+    self.assertEquals(
+        test_queue_consumer.number_of_items, expected_number_of_items)
+
+  def testQueueEmpty(self):
+    """Tests the queue raises the QueueEmpty exception."""
+    test_queue = single_process.SingleProcessQueue()
+
+    with self.assertRaises(errors.QueueEmpty):
+      test_queue.PopItem()
+
+  def testQueueFull(self):
+    """Tests the queue raises the QueueFull exception."""
+    test_queue = single_process.SingleProcessQueue(
+        maximum_number_of_queued_items=5)
+
+    for item in self._ITEMS:
+      test_queue.PushItem(item)
+
+    with self.assertRaises(errors.QueueFull):
+      test_queue.PushItem('item5')
+
+    with self.assertRaises(errors.QueueFull):
+      test_queue.PushItem('item6')
+
+    test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
+    test_queue_consumer.ConsumeItems()
+
+    expected_number_of_items = len(self._ITEMS)
+    self.assertEquals(
+        test_queue_consumer.number_of_items, expected_number_of_items + 1)
+
+
+class SingleProcessEngineTest(unittest.TestCase):
+  """Tests for the engine object."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
+
+  def testEngine(self):
+    """Test the engine functionality."""
+    resolver_context = context.Context()
+    test_engine = single_process.SingleProcessEngine(
+        maximum_number_of_queued_items=25000)
+
+    self.assertNotEquals(test_engine, None)
+
+    source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
+    os_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
+    source_path_spec = path_spec_factory.Factory.NewPathSpec(
+        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
+        parent=os_path_spec)
+
+    test_engine.SetSource(source_path_spec, resolver_context=resolver_context)
+
+    self.assertFalse(test_engine.SourceIsDirectory())
+    self.assertFalse(test_engine.SourceIsFile())
+    self.assertTrue(test_engine.SourceIsStorageMediaImage())
+
+    test_searcher = test_engine.GetSourceFileSystemSearcher(
+        resolver_context=resolver_context)
+    self.assertNotEquals(test_searcher, None)
+    self.assertIsInstance(
+        test_searcher, file_system_searcher.FileSystemSearcher)
+
+    test_engine.PreprocessSource('Windows')
+
+    test_collector = test_engine.CreateCollector(
+        False, vss_stores=None, filter_find_specs=None,
+        resolver_context=resolver_context)
+    self.assertNotEquals(test_collector, None)
+    self.assertIsInstance(
+        test_collector, single_process.SingleProcessCollector)
+
+    test_extraction_worker = test_engine.CreateExtractionWorker(0)
+    self.assertNotEquals(test_extraction_worker, None)
+    self.assertIsInstance(
+        test_extraction_worker,
+        single_process.SingleProcessEventExtractionWorker)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Engine related functions and classes for testing."""
+
+import os
+import unittest
+
+from plaso.engine import queue
+
+
+class TestQueueConsumer(queue.ItemQueueConsumer):
+  """Class that implements the test queue consumer.
+
+     The queue consumer subscribes to updates on the queue.
+  """
+
+  def __init__(self, test_queue):
+    """Initializes the queue consumer.
+
+    Args:
+      test_queue: the test queue (instance of Queue).
+    """
+    super(TestQueueConsumer, self).__init__(test_queue)
+    self.items = []
+
+  def _ConsumeItem(self, item):
+    """Consumes an item callback for ConsumeItems."""
+    self.items.append(item)
+
+  @property
+  def number_of_items(self):
+    """The number of items."""
+    return len(self.items)
+
+
+class EngineTestCase(unittest.TestCase):
+  """The unit test case for a front-end."""
+
+  _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
+
+  # Show full diff results, part of TestCase so does not follow our naming
+  # conventions.
+  maxDiff = None
+
+  def _GetTestFilePath(self, path_segments):
+    """Retrieves the path of a test file relative to the test data directory.
+
+    Args:
+      path_segments: the path segments inside the test data directory.
+
+    Returns:
+      A path of the test file.
+    """
+    # Note that we need to pass the individual path segments to os.path.join
+    # and not a list.
+    return os.path.join(self._TEST_DATA_PATH, *path_segments)
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Engine utility functions."""
+
+import logging
+
+from dfvfs.helpers import file_system_searcher
+
+from plaso.winreg import path_expander
+
+
+def BuildFindSpecsFromFile(filter_file_path, pre_obj=None):
+  """Returns a list of find specification from a filter file.
+
+  Args:
+    filter_file_path: A path to a file that contains find specifications.
+    pre_obj: A preprocessing object (instance of PreprocessObject). This is
+             optional but when provided takes care of expanding each segment.
+  """
+  find_specs = []
+
+  if pre_obj:
+    expander = path_expander.WinRegistryKeyPathExpander()
+
+  with open(filter_file_path, 'rb') as file_object:
+    for line in file_object:
+      line = line.strip()
+      if line.startswith(u'#'):
+        continue
+
+      if pre_obj:
+        try:
+          line = expander.ExpandPath(line, pre_obj=pre_obj)
+        except KeyError as exception:
+          logging.error((
+              u'Unable to use collection filter line: {0:s} with error: '
+              u'{1:s}').format(line, exception))
+          continue
+
+      if not line.startswith(u'/'):
+        logging.warning((
+            u'The filter string must be defined as an abolute path: '
+            u'{0:s}').format(line))
+        continue
+
+      _, _, file_path = line.rstrip().rpartition(u'/')
+      if not file_path:
+        logging.warning(
+            u'Unable to parse the filter string: {0:s}'.format(line))
+        continue
+
+      # Convert the filter paths into a list of path segments and strip
+      # the root path segment.
+      path_segments = line.split(u'/')
+      path_segments.pop(0)
+
+      find_specs.append(file_system_searcher.FindSpec(
+          location_regex=path_segments, case_sensitive=False))
+
+  return find_specs
@@ -0,0 +1,352 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2012 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The event extraction worker."""
+
+import logging
+import os
+
+from dfvfs.resolver import context
+from dfvfs.resolver import resolver as path_spec_resolver
+
+try:
+  from guppy import hpy
+except ImportError:
+  hpy = None
+
+from plaso.engine import classifier
+from plaso.engine import queue
+from plaso.lib import errors
+from plaso.parsers import manager as parsers_manager
+
+
+class BaseEventExtractionWorker(queue.ItemQueueConsumer):
+  """Class that defines the event extraction worker base.
+
+  This class is designed to watch a queue for path specifications of files
+  and directories (file entries) for which events need to be extracted.
+
+  The event extraction worker needs to determine if a parser suitable
+  for parsing a particular file is available. All extracted event objects
+  are pushed on a storage queue for further processing.
+  """
+
+  def __init__(
+      self, identifier, process_queue, event_queue_producer,
+      parse_error_queue_producer, parser_context):
+    """Initializes the event extraction worker object.
+
+    Args:
+      identifier: The identifier, usually an incrementing integer.
+      process_queue: The process queue (instance of Queue). This queue contains
+                     the file entries that need to be processed.
+      event_queue_producer: The event object queue producer (instance of
+                            ItemQueueProducer).
+      parse_error_queue_producer: The parse error queue producer (instance of
+                                  ItemQueueProducer).
+      parser_context: A parser context object (instance of ParserContext).
+    """
+    super(BaseEventExtractionWorker, self).__init__(process_queue)
+    self._enable_debug_output = False
+    self._identifier = identifier
+    self._open_files = False
+    self._parser_context = parser_context
+    self._filestat_parser_object = None
+    self._parser_objects = None
+
+    # We need a resolver context per process to prevent multi processing
+    # issues with file objects stored in images.
+    self._resolver_context = context.Context()
+    self._event_queue_producer = event_queue_producer
+    self._parse_error_queue_producer = parse_error_queue_producer
+
+    # Attributes that contain the current status of the worker.
+    self._current_working_file = u''
+    self._is_running = False
+
+    # Attributes for profiling.
+    self._enable_profiling = False
+    self._heapy = None
+    self._profiling_sample = 0
+    self._profiling_sample_rate = 1000
+    self._profiling_sample_file = u'{0!s}.hpy'.format(self._identifier)
+
+  def _ConsumeItem(self, path_spec):
+    """Consumes an item callback for ConsumeItems.
+
+    Args:
+      path_spec: a path specification (instance of dfvfs.PathSpec).
+    """
+    file_entry = path_spec_resolver.Resolver.OpenFileEntry(
+        path_spec, resolver_context=self._resolver_context)
+
+    if file_entry is None:
+      logging.warning(u'Unable to open file entry: {0:s}'.format(
+          path_spec.comparable))
+      return
+
+    try:
+      self.ParseFileEntry(file_entry)
+    except IOError as exception:
+      logging.warning(u'Unable to parse file: {0:s} with error: {1:s}'.format(
+          path_spec.comparable, exception))
+
+  def _DebugParseFileEntry(self):
+    """Callback for debugging file entry parsing failures."""
+    return
+
+  def _ParseFileEntryWithParser(self, parser_object, file_entry):
+    """Parses a file entry with a specific parser.
+
+    Args:
+      parser_object: A parser object (instance of BaseParser).
+      file_entry: A file entry object (instance of dfvfs.FileEntry).
+
+    Raises:
+      QueueFull: If a queue is full.
+    """
+    try:
+      parser_object.Parse(self._parser_context, file_entry)
+
+    except errors.UnableToParseFile as exception:
+      logging.debug(u'Not a {0:s} file ({1:s}) - {2:s}'.format(
+          parser_object.NAME, file_entry.name, exception))
+
+    except errors.QueueFull:
+      raise
+
+    except IOError as exception:
+      logging.debug(
+          u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format(
+              parser_object.NAME, file_entry.path_spec.comparable,
+              exception))
+
+    # Casting a wide net, catching all exceptions. Done to keep the worker
+    # running, despite the parser hitting errors, so the worker doesn't die
+    # if a single file is corrupted or there is a bug in a parser.
+    except Exception as exception:
+      logging.warning(
+          u'[{0:s}] Unable to process file: {1:s} with error: {2:s}.'.format(
+              parser_object.NAME, file_entry.path_spec.comparable,
+              exception))
+      logging.debug(
+          u'The path specification that caused the error: {0:s}'.format(
+              file_entry.path_spec.comparable))
+      logging.exception(exception)
+
+      if self._enable_debug_output:
+        self._DebugParseFileEntry()
+
+  def _ProfilingStart(self):
+    """Starts the profiling."""
+    self._heapy.setrelheap()
+    self._profiling_sample = 0
+
+    try:
+      os.remove(self._profiling_sample_file)
+    except OSError:
+      pass
+
+  def _ProfilingStop(self):
+    """Stops the profiling."""
+    self._ProfilingWriteSample()
+
+  def _ProfilingUpdate(self):
+    """Updates the profiling."""
+    self._profiling_sample += 1
+
+    if self._profiling_sample >= self._profiling_sample_rate:
+      self._ProfilingWriteSample()
+      self._profiling_sample = 0
+
+  def _ProfilingWriteSample(self):
+    """Writes a profiling sample to the sample file."""
+    heap = self._heapy.heap()
+    heap.dump(self._profiling_sample_file)
+
+  def GetStatus(self):
+    """Returns a status dictionary."""
+    return {
+        'is_running': self._is_running,
+        'identifier': u'Worker_{0:d}'.format(self._identifier),
+        'current_file': self._current_working_file,
+        'counter': self._parser_context.number_of_events}
+
+  def InitalizeParserObjects(self, parser_filter_string=None):
+    """Initializes the parser objects.
+
+    The parser_filter_string is a simple comma separated value string that
+    denotes a list of parser names to include and/or exclude. Each entry
+    can have the value of:
+      + Exact match of a list of parsers, or a preset (see
+        plaso/frontend/presets.py for a full list of available presets).
+      + A name of a single parser (case insensitive), eg. msiecfparser.
+      + A glob name for a single parser, eg: '*msie*' (case insensitive).
+
+    Args:
+      parser_filter_string: Optional parser filter string. The default is None.
+    """
+    self._parser_objects = parsers_manager.ParsersManager.GetParserObjects(
+        parser_filter_string=parser_filter_string)
+
+    for parser_object in self._parser_objects:
+      if parser_object.NAME == 'filestat':
+        self._filestat_parser_object = parser_object
+        break
+
+  def ParseFileEntry(self, file_entry):
+    """Parses a file entry.
+
+    Args:
+      file_entry: A file entry object (instance of dfvfs.FileEntry).
+    """
+    logging.debug(u'[ParseFileEntry] Parsing: {0:s}'.format(
+        file_entry.path_spec.comparable))
+
+    self._current_working_file = getattr(
+        file_entry.path_spec, u'location', file_entry.name)
+
+    if file_entry.IsDirectory() and self._filestat_parser_object:
+      self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry)
+
+    elif file_entry.IsFile():
+      # TODO: Not go through all parsers, just the ones
+      # that the classifier classifies the file as.
+
+      for parser_object in self._parser_objects:
+        logging.debug(u'Trying to parse: {0:s} with parser: {1:s}'.format(
+            file_entry.name, parser_object.NAME))
+
+        self._ParseFileEntryWithParser(parser_object, file_entry)
+
+    logging.debug(u'[ParseFileEntry] Done parsing: {0:s}'.format(
+        file_entry.path_spec.comparable))
+
+    if self._enable_profiling:
+      self._ProfilingUpdate()
+
+    if self._open_files:
+      try:
+        for sub_file_entry in classifier.Classifier.SmartOpenFiles(file_entry):
+          if self._abort:
+            break
+
+          self.ParseFileEntry(sub_file_entry)
+
+      except IOError as exception:
+        logging.warning(
+            u'Unable to parse file: {0:s} with error: {1:s}'.format(
+                file_entry.path_spec.comparable, exception))
+
+  def Run(self):
+    """Extracts event objects from file entries."""
+    self._parser_context.ResetCounters()
+
+    if self._enable_profiling:
+      self._ProfilingStart()
+
+    self._is_running = True
+
+    logging.info(
+        u'Worker {0:d} (PID: {1:d}) started monitoring process queue.'.format(
+            self._identifier, os.getpid()))
+
+    self.ConsumeItems()
+
+    logging.info(
+        u'Worker {0:d} (PID: {1:d}) stopped monitoring process queue.'.format(
+            self._identifier, os.getpid()))
+
+    self._current_working_file = u''
+
+    self._is_running = False
+
+    if self._enable_profiling:
+      self._ProfilingStop()
+
+    self._resolver_context.Empty()
+
+  def SetEnableDebugOutput(self, enable_debug_output):
+    """Enables or disables debug output.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the debug output
+                           should be enabled.
+    """
+    self._enable_debug_output = enable_debug_output
+
+  def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
+    """Enables or disables profiling.
+
+    Args:
+      enable_debug_output: boolean value to indicate if the profiling
+                           should be enabled.
+      profiling_sample_rate: optional integer indicating the profiling sample
+                             rate. The value contains the number of files
+                             processed. The default value is 1000.
+    """
+    if hpy:
+      self._enable_profiling = enable_profiling
+      self._profiling_sample_rate = profiling_sample_rate
+
+    if self._enable_profiling and not self._heapy:
+      self._heapy = hpy()
+
+  def SetFilterObject(self, filter_object):
+    """Sets the filter object.
+
+    Args:
+      filter_object: the filter object (instance of objectfilter.Filter).
+    """
+    self._parser_context.SetFilterObject(filter_object)
+
+  def SetMountPath(self, mount_path):
+    """Sets the mount path.
+
+    Args:
+      mount_path: string containing the mount path.
+    """
+    self._parser_context.SetMountPath(mount_path)
+
+  # TODO: rename this mode.
+  def SetOpenFiles(self, open_files):
+    """Sets the open files mode.
+
+    Args:
+      open_files: boolean value to indicate if the worker should scan for
+                  file entries inside files.
+    """
+    self._open_files = open_files
+
+  def SetTextPrepend(self, text_prepend):
+    """Sets the text prepend.
+
+    Args:
+      text_prepend: string that contains the text to prepend to every
+                    event object.
+    """
+    self._parser_context.SetTextPrepend(text_prepend)
+
+  def SignalAbort(self):
+    """Signals the worker to abort."""
+    super(BaseEventExtractionWorker, self).SignalAbort()
+    self._parser_context.SignalAbort()
+
+  @classmethod
+  def SupportsProfiling(cls):
+    """Returns a boolean value to indicate if profiling is supported."""
+    return hpy is not None