Import from old repository

2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,184 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the format classifier classes.
+
+Plaso is a tool that extracts events from files on a file system.
+For this it either reads files from a mounted file system or from an image.
+It uses an exhaustive approach to determine parse events from a file, meaning
+that it passes the file first to parser A and if that fails it continues with
+parser B.
+
+The classifier is designed to be able to more quickly determine the format of
+a file and limit the number of parsers part of the exhaustive approach.
+
+The current version of the classifier uses signatures to identify file formats.
+Some signatures must always be defined at a specific offset, this is referred to
+as an offset-bound signature or bound for short. Other signatures are commonly
+found at a specific offset but not necessarily. The last form of signatures is
+unbound, meaning that they don't have a fixed or common location where they can
+be found.
+
+A specification is a collection of signatures with additional metadata that
+defines a specific file format. These specifications are grouped into a store
+for ease of use, e.g. so that they can be read from a configuration file all
+at once.
+
+The classifier requires a scanner to analyze the data in a file. The scanner
+uses the specifications in a store to scan for the signatures or a certain
+format.
+
+The classifier allows for multiple methods of scanning a file:
+* full:      the entire file is scanned. This is the default scanning method.
+* head-tail: only the beginning (head) and the end (tail) of the file is
+             scanned. This approach is more efficient for larger files.
+             The buffer size is used as the size of the data that is scanned.
+             Smaller files are scanned entirely.
+
+The classifier returns zero or more classifications which point to a format
+specification and the scan results for the signatures defined by
+the specification.
+"""
+
+import logging
+
+
+class Classification(object):
+  """This class represents a format classification.
+
+     The format classification consists of a format specification and
+     scan results.
+  """
+
+  def __init__(self, specification, scan_matches):
+    """Initializes the classification.
+
+    Args:
+      specification: the format specification (instance of Specification).
+      scan_matches: the list of scan matches (instances of _ScanMatch).
+
+    Raises:
+      TypeError: if the specification is not of type Specification.
+    """
+    self._specification = specification
+    self.scan_matches = scan_matches
+
+  @property
+  def identifier(self):
+    """The classification type."""
+    return self._specification.identifier
+
+  @property
+  def magic_types(self):
+    """The magic types or an empty list if none."""
+    return self._specification.magic_types
+
+  @property
+  def mime_types(self):
+    """The mime type or an empty list if none."""
+    return self._specification.mime_types
+
+
+class Classifier(object):
+  """Class for classifying formats in raw data.
+
+  The classifier is initialized with one or more specifications.
+  After which it can be used to classify data in files or file-like objects.
+
+  The actual scanning of the data is done by the scanner, these are separate
+  to allow for the scanner to easily be replaced for a more efficient
+  alternative if necessary.
+
+  For an example of how the classifier is to be used see: classify.py.
+  """
+  BUFFER_SIZE = 16 * 1024 * 1024
+
+  def __init__(self, scanner):
+    """Initializes the classifier and sets up the scanning related structures.
+
+    Args:
+      scanner: an instance of the signature scanner.
+    """
+    self._scanner = scanner
+
+  def _GetClassifications(self, scan_results):
+    """Retrieves the classifications based on the scan results.
+
+    Multiple scan results are combined into a single classification.
+
+    Args:
+      scan_results: a list containing instances of _ScanResult.
+
+    Returns:
+      a list of instances of Classification.
+    """
+    classifications = {}
+
+    for scan_result in scan_results:
+      for scan_match in scan_result.scan_matches:
+        logging.debug(
+            u'scan match at offset: 0x{0:08x} specification: {1:s}'.format(
+                scan_match.total_data_offset, scan_result.identifier))
+
+      if scan_result.identifier not in classifications:
+        classifications[scan_result.identifier] = Classification(
+            scan_result.specification, scan_result.scan_matches)
+
+    return classifications.values()
+
+  def ClassifyBuffer(self, data, data_size):
+    """Classifies the data in a buffer, assumes all necessary data is available.
+
+    Args:
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+
+    Returns:
+      a list of classifications or an empty list.
+    """
+    scan_state = self._scanner.StartScan()
+    self._scanner.ScanBuffer(scan_state, data, data_size)
+    self._scanner.StopScan(scan_state)
+
+    return self._GetClassifications(scan_state.GetResults())
+
+  def ClassifyFileObject(self, file_object):
+    """Classifies the data in a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      a list of classifier classifications or an empty list.
+    """
+    scan_results = self._scanner.ScanFileObject(file_object)
+
+    return self._GetClassifications(scan_results)
+
+  def ClassifyFile(self, filename):
+    """Classifies the data in a file.
+
+    Args:
+      filename: the name of the file.
+
+    Returns:
+      a list of classifier classifications or an empty list.
+    """
+    classifications = []
+    with open(filename, 'rb') as file_object:
+      classifications = self.ClassifyFileObject(file_object)
+    return classifications
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the format classifier classes."""
+
+import os
+import unittest
+
+from plaso.classifier import classifier
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+class ClassifierTest(unittest.TestCase):
+  """Class to test Classifier."""
+
+  def setUp(self):
+    """Function to test the initialize function."""
+    self._store = test_lib.CreateSpecificationStore()
+
+    self._test_file1 = os.path.join('test_data', 'NTUSER.DAT')
+    self._test_file2 = os.path.join('test_data', 'syslog.zip')
+
+  def testClassifyFileWithScanner(self):
+    """Function to test the classify file function."""
+    test_scanner = scanner.Scanner(self._store)
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file1)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file2)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+  def testClassifyFileWithOffsetBoundScanner(self):
+    """Function to test the classify file function."""
+    test_scanner = scanner.OffsetBoundScanner(self._store)
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file1)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+    test_classifier = classifier.Classifier(test_scanner)
+    classifications = test_classifier.ClassifyFile(self._test_file2)
+    self.assertEqual(len(classifications), 1)
+
+    # TODO: assert the contents of the classification.
+
+
+if __name__ == "__main__":
+  unittest.main()
@@ -0,0 +1,78 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains a small classify test program."""
+
+import argparse
+import glob
+import logging
+
+from plaso.classifier import classifier
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+def Main():
+  args_parser = argparse.ArgumentParser(
+      description='Classify test program.')
+
+  args_parser.add_argument(
+      '-t', '--type', type='choice', metavar='TYPE', action='store',
+      dest='scanner_type', choices=['scan-tree', 'scan_tree'],
+      default='scan-tree', help='The scanner type')
+
+  args_parser.add_argument(
+      '-v', '--verbose', action='store_true', dest='verbose', default=False,
+      help='Print verbose output')
+
+  args_parser.add_argument(
+      'filenames', nargs='+', action='store', metavar='FILENAMES',
+      default=None, help='The input filename(s) to classify.')
+
+  options = args_parser.parse_args()
+
+  if options.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+
+  files_to_classify = []
+  for input_glob in options.filenames:
+    files_to_classify += glob.glob(input_glob)
+
+  store = test_lib.CreateSpecificationStore()
+
+  if options.scanner_type not in ['scan-tree', 'scan_tree']:
+    print u'Unsupported scanner type defaulting to: scan-tree'
+
+  scan = scanner.Scanner(store)
+  classify = classifier.Classifier(scan)
+
+  for input_filename in files_to_classify:
+    classifications = classify.ClassifyFile(input_filename)
+
+    print u'File: {0:s}'.format(input_filename)
+    if not classifications:
+      print u'No classifications found.'
+    else:
+      print u'Classifications:'
+      for classification in classifications:
+        print u'\tformat: {0:s}'.format(classification.identifier)
+
+    print u''
+
+
+if __name__ == '__main__':
+  Main()
@@ -0,0 +1,308 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The patterns classes used by the scan tree-based format scanner."""
+
+
+class _ByteValuePatterns(object):
+  """Class that implements a mapping between byte value and patterns.
+
+     The byte value patterns are used in the scan tree-based format scanner
+     to map a byte value to one or more patterns.
+  """
+
+  def __init__(self, byte_value):
+    """Initializes the pattern table (entry) byte value.
+
+    Args:
+      byte_value: the byte value that maps the patterns in the table.
+    """
+    super(_ByteValuePatterns, self).__init__()
+    self.byte_value = byte_value
+    self.patterns = {}
+
+  def __unicode__(self):
+    """Retrieves a string representation of the byte value patterns."""
+    return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
+
+  def AddPattern(self, pattern):
+    """Adds a pattern.
+
+    Args:
+      pattern: the pattern (instance of Pattern).
+
+    Raises:
+      ValueError: if the table entry already contains a pattern
+                  with the same identifier.
+    """
+    if pattern.identifier in self.patterns:
+      raise ValueError(u'Pattern {0:s} is already defined.'.format(
+          pattern.identifier))
+
+    self.patterns[pattern.identifier] = pattern
+
+  def ToDebugString(self, indentation_level=1):
+    """Converts the byte value pattern into a debug string."""
+    indentation = u'  ' * indentation_level
+
+    header = u'{0:s}byte value: 0x{1:02x}\n'.format(
+        indentation, ord(self.byte_value))
+
+    entries = u''.join([u'{0:s}  patterns: {1:s}\n'.format(
+        indentation, identifier) for identifier in self.patterns])
+
+    return u''.join([header, entries, u'\n'])
+
+
+class _SkipTable(object):
+  """Class that implements a skip table.
+
+     The skip table is used in the scan tree-based format scanner to determine
+     the skip value for the Boyer–Moore–Horspool search.
+  """
+
+  def __init__(self, skip_pattern_length):
+    """Initializes the skip table.
+
+    Args:
+      skip_pattern_length: the (maximum) skip pattern length.
+    """
+    super(_SkipTable, self).__init__()
+    self._skip_value_per_byte_value = {}
+    self.skip_pattern_length = skip_pattern_length
+
+  def __getitem__(self, key):
+    """Retrieves a specific skip value.
+
+    Args:
+      key: the byte value within the skip table.
+
+    Returns:
+      the skip value for the key or the maximim skip value
+      if no corresponding key was found.
+    """
+    if key in self._skip_value_per_byte_value:
+      return self._skip_value_per_byte_value[key]
+    return self.skip_pattern_length
+
+  def SetSkipValue(self, byte_value, skip_value):
+    """Sets a skip value.
+
+    Args:
+      byte_value: the corresponding byte value.
+      skip_value: the number of bytes to skip.
+
+    Raises:
+      ValueError: if byte value or skip value is out of bounds.
+    """
+    if byte_value < 0 or byte_value > 255:
+      raise ValueError(u'Invalid byte value, value out of bounds.')
+
+    if skip_value < 0 or skip_value >= self.skip_pattern_length:
+      raise ValueError(u'Invalid skip value, value out of bounds.')
+
+    if (not byte_value in self._skip_value_per_byte_value or
+        self._skip_value_per_byte_value[byte_value] > skip_value):
+      self._skip_value_per_byte_value[byte_value] = skip_value
+
+  def ToDebugString(self):
+    """Converts the skip table into a debug string."""
+    header = u'Byte value\tSkip value\n'
+
+    entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
+        byte_value, self._skip_value_per_byte_value[byte_value])
+                        for byte_value in self._skip_value_per_byte_value])
+
+    default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
+
+    return u''.join([header, entries, default, u'\n'])
+
+
+class Pattern(object):
+  """Class that implements a pattern."""
+
+  def __init__(self, signature_index, signature, specification):
+    """Initializes the pattern.
+
+    Args:
+      signature_index: the index of the signature within the specification.
+      signature: the signature (instance of Signature).
+      specification: the specification (instance of Specification) that
+                     contains the signature.
+    """
+    super(Pattern, self).__init__()
+    self._signature_index = signature_index
+    self.signature = signature
+    self.specification = specification
+
+  def __unicode__(self):
+    """Retrieves a string representation."""
+    return self.identifier
+
+  @property
+  def expression(self):
+    """The signature expression."""
+    return self.signature.expression
+
+  @property
+  def identifier(self):
+    """The identifier."""
+    # Using _ here because some scanner implementation are limited to what
+    # characters can be used in the identifiers.
+    return u'{0:s}_{1:d}'.format(
+        self.specification.identifier, self._signature_index)
+
+  @property
+  def offset(self):
+    """The signature offset."""
+    return self.signature.offset
+
+  @property
+  def is_bound(self):
+    """Boolean value to indicate the signature is bound to an offset."""
+    return self.signature.is_bound
+
+
+class PatternTable(object):
+  """Class that implements a pattern table.
+
+     The pattern table is used in the the scan tree-based format scanner
+     to construct a scan tree. It contains either unbound patterns or
+     patterns bound to a specific offset.
+  """
+
+  def __init__(self, patterns, ignore_list, is_bound=None):
+    """Initializes and builds the patterns table from patterns.
+
+    Args:
+      patterns: a list of the patterns.
+      ignore_list: a list of pattern offsets to ignore.
+      is_bound: optional boolean value to indicate if the signatures are bound
+                to offsets. The default is None, which means the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+
+    Raises:
+      ValueError: if a signature pattern is too small to be useful (< 4).
+    """
+    super(PatternTable, self).__init__()
+    self._byte_values_per_offset = {}
+    self.largest_pattern_length = 0
+    self.largest_pattern_offset = 0
+    self.patterns = []
+    self.smallest_pattern_length = 0
+    self.smallest_pattern_offset = 0
+
+    for pattern in patterns:
+      if is_bound is not None and pattern.signature.is_bound != is_bound:
+        continue
+
+      pattern_length = len(pattern.expression)
+
+      if pattern_length < 4:
+        raise ValueError(u'Pattern too small to be useful.')
+
+      self.smallest_pattern_length = min(
+          self.smallest_pattern_length, pattern_length)
+      self.largest_pattern_length = max(
+          self.largest_pattern_length, pattern_length)
+
+      self.patterns.append(pattern)
+
+      self._AddPattern(pattern, ignore_list, is_bound)
+
+  def _AddPattern(self, pattern, ignore_list, is_bound):
+    """Adds the byte values per offset in the pattern to the table.
+
+    Args:
+      pattern: the pattern (instance of Pattern).
+      ignore_list: a list of pattern offsets to ignore.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+    """
+    pattern_offset = pattern.offset if is_bound else 0
+
+    self.smallest_pattern_offset = min(
+        self.smallest_pattern_offset, pattern_offset)
+    self.largest_pattern_offset = max(
+        self.largest_pattern_offset, pattern_offset)
+
+    for byte_value in pattern.expression:
+      if pattern_offset not in self._byte_values_per_offset:
+        self._byte_values_per_offset[pattern_offset] = {}
+
+      if pattern_offset not in ignore_list:
+        byte_values = self._byte_values_per_offset[pattern_offset]
+
+        if byte_value not in byte_values:
+          byte_values[byte_value] = _ByteValuePatterns(byte_value)
+
+        byte_value_patterns = byte_values[byte_value]
+
+        byte_value_patterns.AddPattern(pattern)
+
+      pattern_offset += 1
+
+  @property
+  def offsets(self):
+    """The offsets."""
+    return self._byte_values_per_offset.keys()
+
+  def GetByteValues(self, pattern_offset):
+    """Returns the bytes values for a specific pattern offset."""
+    return self._byte_values_per_offset[pattern_offset]
+
+  def GetSkipTable(self):
+    """Retrieves the skip table for the patterns in the table.
+
+    Returns:
+      The skip table (instance of SkipTable).
+    """
+    skip_table = _SkipTable(self.smallest_pattern_length)
+
+    for pattern in self.patterns:
+      if pattern.expression:
+        skip_value = self.smallest_pattern_length
+
+        for expression_index in range(0, self.smallest_pattern_length):
+          skip_value -= 1
+          skip_table.SetSkipValue(
+              ord(pattern.expression[expression_index]), skip_value)
+
+    return skip_table
+
+  def ToDebugString(self):
+    """Converts the pattern table into a debug string."""
+    header = u'Pattern offset\tByte value(s)\n'
+    entries = u''
+
+    for pattern_offset in self._byte_values_per_offset:
+      entries += u'{0:d}'.format(pattern_offset)
+
+      byte_values = self._byte_values_per_offset[pattern_offset]
+
+      for byte_value in byte_values:
+        identifiers = u', '.join(
+            [identifier for identifier in byte_values[byte_value].patterns])
+
+        entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
+
+      entries += u'\n'
+
+    return u''.join([header, entries, u'\n'])
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The range list data type."""
+
+
+class Range(object):
+  """Class that implements a range object."""
+
+  def __init__(self, range_offset, range_size):
+    """Initializes the range object.
+
+    Args:
+      range_offset: the range offset.
+      range_size: the range size.
+
+    Raises:
+      ValueError: if the range offset or range size is not valid.
+    """
+    if range_offset < 0:
+      raise ValueError(u'Invalid range offset value.')
+
+    if range_size < 0:
+      raise ValueError(u'Invalid range size value.')
+
+    super(Range, self).__init__()
+    self.start_offset = range_offset
+    self.size = range_size
+    self.end_offset = range_offset + range_size
+
+
+class RangeList(object):
+  """Class that implements a range list object."""
+
+  def __init__(self):
+    """Initializes the range list object."""
+    super(RangeList, self).__init__()
+    self.ranges = []
+
+  @property
+  def number_of_ranges(self):
+    """The number of ranges."""
+    return len(self.ranges)
+
+  def GetSpanningRange(self):
+    """Retrieves the range spanning the entire range list."""
+    if self.number_of_ranges == 0:
+      return
+
+    first_range = self.ranges[0]
+    last_range = self.ranges[-1]
+    range_size = last_range.end_offset - first_range.start_offset
+
+    return Range(first_range.start_offset, range_size)
+
+  def Insert(self, range_offset, range_size):
+    """Inserts the range defined by the offset and size in the list.
+
+       Note that overlapping ranges will be merged.
+
+    Args:
+      range_offset: the range offset.
+      range_size: the range size.
+
+    Raises:
+      RuntimeError: if the range cannot be inserted.
+      ValueError: if the range offset or range size is not valid.
+    """
+    if range_offset < 0:
+      raise ValueError(u'Invalid range offset value.')
+
+    if range_size < 0:
+      raise ValueError(u'Invalid range size value.')
+
+    insert_index = None
+    merge_index = None
+
+    number_of_range_objects = len(self.ranges)
+
+    range_end_offset = range_offset + range_size
+
+    if number_of_range_objects == 0:
+      insert_index = 0
+
+    else:
+      range_object_index = 0
+
+      for range_object in self.ranges:
+        # Ignore negative ranges.
+        if range_object.start_offset < 0:
+          range_object_index += 1
+          continue
+
+        # Insert the range before an existing one.
+        if range_end_offset < range_object.start_offset:
+          insert_index = range_object_index
+          break
+
+        # Ignore the range since the existing one overlaps it.
+        if (range_offset >= range_object.start_offset and
+            range_end_offset <= range_object.end_offset):
+          break
+
+        # Merge the range since it overlaps the existing one at the end.
+        if (range_offset >= range_object.start_offset and
+            range_offset <= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        # Merge the range since it overlaps the existing one at the start.
+        if (range_end_offset >= range_object.start_offset and
+            range_end_offset <= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        # Merge the range since it overlaps the existing one.
+        if (range_offset <= range_object.start_offset and
+            range_end_offset >= range_object.end_offset):
+          merge_index = range_object_index
+          break
+
+        range_object_index += 1
+
+      # Insert the range after the last one.
+      if range_object_index >= number_of_range_objects:
+        insert_index = number_of_range_objects
+
+    if insert_index is not None and merge_index is not None:
+      raise RuntimeError(
+          u'Unable to insert the range both insert and merge specified.')
+
+    if insert_index is not None:
+      self.ranges.insert(insert_index, Range(range_offset, range_size))
+
+    elif merge_index is not None:
+      range_object = self.ranges[merge_index]
+      if range_offset < range_object.start_offset:
+        range_object.size += range_object.start_offset - range_offset
+        range_object.start_offset = range_offset
+      if range_end_offset > range_object.end_offset:
+        range_object.size += range_end_offset - range_object.end_offset
+        range_object.end_offset = range_end_offset
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the range list."""
+
+import unittest
+
+from plaso.classifier import range_list
+
+
+class RangeListTest(unittest.TestCase):
+  """Class to test the range list."""
+
+  def testInsertPositiveRanges(self):
+    """Function to test the insert function using positive ranges."""
+    range_list_object = range_list.RangeList()
+
+    # Test non-overlapping range.
+    range_list_object.Insert(500, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 1)
+
+    range_object = range_list_object.ranges[0]
+    self.assertEquals(range_object.start_offset, 500)
+    self.assertEquals(range_object.end_offset, 600)
+    self.assertEquals(range_object.size, 100)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(2000, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 2000)
+    self.assertEquals(range_object.end_offset, 2100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test range that overlaps with an existing range at the start.
+    range_list_object.Insert(1950, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1950)
+    self.assertEquals(range_object.end_offset, 2100)
+    self.assertEquals(range_object.size, 150)
+
+    # Test range that overlaps with an existing range at the end.
+    range_list_object.Insert(2050, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 2)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1950)
+    self.assertEquals(range_object.end_offset, 2150)
+    self.assertEquals(range_object.size, 200)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(1000, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1000)
+    self.assertEquals(range_object.end_offset, 1100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test range that aligns with an existing range at the end.
+    range_list_object.Insert(1100, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 1000)
+    self.assertEquals(range_object.end_offset, 1200)
+    self.assertEquals(range_object.size, 200)
+
+    # Test range that aligns with an existing range at the start.
+    range_list_object.Insert(900, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 3)
+
+    range_object = range_list_object.ranges[1]
+    self.assertEquals(range_object.start_offset, 900)
+    self.assertEquals(range_object.end_offset, 1200)
+    self.assertEquals(range_object.size, 300)
+
+    # Test non-overlapping range.
+    range_list_object.Insert(0, 100)
+    self.assertEquals(range_list_object.number_of_ranges, 4)
+
+    range_object = range_list_object.ranges[0]
+    self.assertEquals(range_object.start_offset, 0)
+    self.assertEquals(range_object.end_offset, 100)
+    self.assertEquals(range_object.size, 100)
+
+    # Test invalid ranges.
+    with self.assertRaises(ValueError):
+      range_list_object.Insert(-1, 100)
+
+    with self.assertRaises(ValueError):
+      range_list_object.Insert(3000, -100)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,744 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The scan tree classes used by the scan tree-based format scanner."""
+
+import logging
+
+from plaso.classifier import patterns
+from plaso.classifier import range_list
+
+
+class _PatternWeights(object):
+  """Class that implements pattern weights."""
+
+  def __init__(self):
+    """Initializes the pattern weights."""
+    super(_PatternWeights, self).__init__()
+    self._offsets_per_weight = {}
+    self._weight_per_offset = {}
+
+  def AddOffset(self, pattern_offset):
+    """Adds a pattern offset and sets its weight to 0.
+
+    Args:
+      pattern_offset: the pattern offset to add to the pattern weights.
+
+    Raises:
+      ValueError: if the pattern weights already contains the pattern offset.
+    """
+    if pattern_offset in self._weight_per_offset:
+      raise ValueError(u'Pattern offset already set.')
+
+    self._weight_per_offset[pattern_offset] = 0
+
+  def AddWeight(self, pattern_offset, weight):
+    """Adds a weight for a specific pattern offset.
+
+    Args:
+      pattern_offset: the pattern offset to add to the pattern weights.
+      weight: the corresponding weight to add.
+
+    Raises:
+      ValueError: if the pattern weights does not contain the pattern offset.
+    """
+    if pattern_offset not in self._weight_per_offset:
+      raise ValueError(u'Pattern offset not set.')
+
+    self._weight_per_offset[pattern_offset] += weight
+
+    if weight not in self._offsets_per_weight:
+      self._offsets_per_weight[weight] = []
+
+    self._offsets_per_weight[weight].append(pattern_offset)
+
+  def GetLargestWeight(self):
+    """Retrieves the largest weight or 0 if none."""
+    if self._offsets_per_weight:
+      return max(self._offsets_per_weight)
+
+    return 0
+
+  def GetOffsetsForWeight(self, weight):
+    """Retrieves the list of offsets for a specific weight."""
+    return self._offsets_per_weight[weight]
+
+  def GetWeightForOffset(self, pattern_offset):
+    """Retrieves the weight for a specific pattern offset."""
+    return self._weight_per_offset[pattern_offset]
+
+  def ToDebugString(self):
+    """Converts the pattern weights into a debug string."""
+    header1 = u'Pattern offset\tWeight\n'
+
+    entries1 = u''.join([u'{0:d}\t{1:d}\n'.format(
+        pattern_offset, self._weight_per_offset[pattern_offset])
+                         for pattern_offset in self._weight_per_offset])
+
+    header2 = u'Weight\tPattern offset(s)\n'
+
+    entries2 = u''.join([u'{0:d}\t{1!s}\n'.format(
+        weight, self._offsets_per_weight[weight])
+                         for weight in self._offsets_per_weight])
+
+    return u''.join([header1, entries1, u'\n', header2, entries2, u'\n'])
+
+  def SetWeight(self, pattern_offset, weight):
+    """Sets a weight for a specific pattern offset.
+
+    Args:
+      pattern_offset: the pattern offset to set in the pattern weights.
+      weight: the corresponding weight to set.
+
+    Raises:
+      ValueError: if the pattern weights does not contain the pattern offset.
+    """
+    if pattern_offset not in self._weight_per_offset:
+      raise ValueError(u'Pattern offset not set.')
+
+    self._weight_per_offset[pattern_offset] = weight
+
+    if weight not in self._offsets_per_weight:
+      self._offsets_per_weight[weight] = []
+
+    self._offsets_per_weight[weight].append(pattern_offset)
+
+
+class ScanTree(object):
+  """Class that implements a scan tree."""
+
+  _COMMON_BYTE_VALUES = frozenset(
+      '\x00\x01\xff\t\n\r 0123456789'
+      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+      'abcdefghijklmnopqrstuvwxyz')
+
+  # The offset must be positive, negative offsets are ignored.
+  OFFSET_MODE_POSITIVE = 1
+  # The offset must be negative, positive offsets are ignored.
+  OFFSET_MODE_NEGATIVE = 2
+  # The offset must be positive, an error is raised for negative offsets.
+  OFFSET_MODE_POSITIVE_STRICT = 3
+  # The offset must be negative, an error is raised for positive offsets.
+  OFFSET_MODE_NEGATIVE_STRICT = 4
+
+  def __init__(
+      self, specification_store, is_bound,
+      offset_mode=OFFSET_MODE_POSITIVE_STRICT):
+    """Initializes and builds the scan tree.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+      offset_mode: optional value to indicate how the signature offsets should
+                   be handled. The default is that the offset must be positive
+                   and an error is raised for negative offsets.
+    """
+    super(ScanTree, self).__init__()
+    self.largest_length = 0
+    self.pattern_list = []
+    self.range_list = range_list.RangeList()
+    self.root_node = None
+    self.skip_table = None
+
+    # First determine all the patterns from the specification store.
+    self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode)
+
+    # Next create the scan tree starting with the root node.
+    ignore_list = []
+    pattern_table = patterns.PatternTable(
+        self.pattern_list, ignore_list, is_bound)
+
+    if pattern_table.patterns:
+      self.root_node = self._BuildScanTreeNode(
+          pattern_table, ignore_list, is_bound)
+
+      logging.debug(u'Scan tree:\n{0:s}'.format(
+          self.root_node.ToDebugString()))
+
+      # At the end the skip table is determined to provide for the
+      # Boyer–Moore–Horspool skip value.
+      self.skip_table = pattern_table.GetSkipTable()
+
+      logging.debug(u'Skip table:\n{0:s}'.format(
+          self.skip_table.ToDebugString()))
+
+      self.largest_length = pattern_table.largest_pattern_length
+
+  def _BuildPatterns(
+      self, specification_store, is_bound,
+      offset_mode=OFFSET_MODE_POSITIVE_STRICT):
+    """Builds the list of patterns.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+      offset_mode: optional value to indicate how the signature offsets should
+                   be handled. The default is that the offset must be positive
+                   and an error is raised for negative offsets.
+
+    Raises:
+      ValueError: if a signature offset invalid according to specified offset
+                  mode or a signature pattern is too small to be useful (< 4).
+    """
+    self.pattern_list = []
+
+    for specification in specification_store.specifications:
+      signature_index = 0
+
+      for signature in specification.signatures:
+        if signature.expression:
+          signature_offset = signature.offset if is_bound else 0
+          signature_pattern_length = len(signature.expression)
+
+          # Make sure signature offset is numeric.
+          try:
+            signature_offset = int(signature_offset)
+          except (TypeError, ValueError):
+            signature_offset = 0
+
+          if signature_offset < 0:
+            if offset_mode == self.OFFSET_MODE_POSITIVE:
+              continue
+            elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT:
+              raise ValueError(u'Signature offset less than 0.')
+
+            # The range list does not allow offsets to be negative and thus
+            # the signature offset is turned into a positive equivalent.
+            signature_offset *= -1
+
+            # The signature size is substracted to make sure the spanning
+            # range will align with the original negative offset values.
+            signature_offset -= signature_pattern_length
+
+          elif signature_offset > 0:
+            if offset_mode == self.OFFSET_MODE_NEGATIVE:
+              continue
+            elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT:
+              raise ValueError(u'Signature offset greater than 0.')
+
+          if signature_pattern_length < 4:
+            raise ValueError(u'Signature pattern smaller than 4.')
+
+          pattern = patterns.Pattern(
+              signature_index, signature, specification)
+          self.pattern_list.append(pattern)
+          self.range_list.Insert(signature_offset, signature_pattern_length)
+
+        signature_index += 1
+
+  def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound):
+    """Builds a scan tree node.
+
+    Args:
+      pattern_table: a pattern table (instance of PatternTable).
+      ignore_list: a list of pattern offsets to ignore
+      is_bound: boolean value to indicate if the signatures are bound
+                to offsets. A value of None indicates that the value should
+                be ignored and both bound and unbound patterns are considered
+                unbound.
+
+    Raises:
+      ValueError: if number of byte value patterns value out of bounds.
+
+    Returns:
+      A scan tree node (instance of ScanTreeNode).
+    """
+    # Make a copy of the lists because the function is going to alter them
+    # and the changes must remain in scope of the function.
+    pattern_list = list(pattern_table.patterns)
+    ignore_list = list(ignore_list)
+
+    similarity_weights = _PatternWeights()
+    occurrence_weights = _PatternWeights()
+    value_weights = _PatternWeights()
+
+    for pattern_offset in pattern_table.offsets:
+      similarity_weights.AddOffset(pattern_offset)
+      occurrence_weights.AddOffset(pattern_offset)
+      value_weights.AddOffset(pattern_offset)
+
+      byte_values = pattern_table.GetByteValues(pattern_offset)
+      number_of_byte_values = len(byte_values)
+
+      if number_of_byte_values > 1:
+        occurrence_weights.SetWeight(pattern_offset, number_of_byte_values)
+
+      for byte_value in byte_values:
+        byte_value_patterns = byte_values[byte_value]
+        byte_value_weight = len(byte_value_patterns.patterns)
+
+        if byte_value_weight > 1:
+          similarity_weights.AddWeight(pattern_offset, byte_value_weight)
+
+        if byte_value_weight not in self._COMMON_BYTE_VALUES:
+          value_weights.AddWeight(pattern_offset, 1)
+
+    logging.debug(u'Pattern table:\n{0:s}'.format(
+        pattern_table.ToDebugString()))
+    logging.debug(u'Similarity weights:\n{0:s}'.format(
+        similarity_weights.ToDebugString()))
+    logging.debug(u'Occurrence weights:\n{0:s}'.format(
+        occurrence_weights.ToDebugString()))
+    logging.debug(u'Value weights:\n{0:s}'.format(
+        value_weights.ToDebugString()))
+
+    pattern_offset = self._GetMostSignificantPatternOffset(
+        pattern_list, similarity_weights, occurrence_weights, value_weights)
+
+    ignore_list.append(pattern_offset)
+
+    # For the scan tree negative offsets are adjusted so that
+    # the smallest pattern offset is 0.
+    scan_tree_pattern_offset = pattern_offset
+    if scan_tree_pattern_offset < 0:
+      scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset
+
+    scan_tree_node = ScanTreeNode(scan_tree_pattern_offset)
+
+    byte_values = pattern_table.GetByteValues(pattern_offset)
+
+    for byte_value in byte_values:
+      byte_value_patterns = byte_values[byte_value]
+
+      logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString()))
+
+      number_of_byte_value_patterns = len(byte_value_patterns.patterns)
+
+      if number_of_byte_value_patterns <= 0:
+        raise ValueError(
+            u'Invalid number of byte value patterns value out of bounds.')
+
+      elif number_of_byte_value_patterns == 1:
+        for identifier in byte_value_patterns.patterns:
+          logging.debug(
+              u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format(
+                  identifier, ord(byte_value)))
+
+          scan_tree_node.AddByteValue(
+              byte_value, byte_value_patterns.patterns[identifier])
+
+      else:
+        pattern_table = patterns.PatternTable(
+            byte_value_patterns.patterns.itervalues(), ignore_list, is_bound)
+
+        scan_sub_node = self._BuildScanTreeNode(
+            pattern_table, ignore_list, is_bound)
+
+        logging.debug(
+            u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format(
+                ord(byte_value), scan_sub_node.ToDebugString()))
+
+        scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node)
+
+      for identifier in byte_value_patterns.patterns:
+        logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format(
+            identifier, self._PatternsToDebugString(pattern_list)))
+
+        pattern_list.remove(byte_value_patterns.patterns[identifier])
+
+    logging.debug(u'Remaining patterns:\n{0:s}'.format(
+        self._PatternsToDebugString(pattern_list)))
+
+    number_of_patterns = len(pattern_list)
+
+    if number_of_patterns == 1:
+      logging.debug(u'Setting pattern: {0:s} for default value'.format(
+          pattern_list[0].identifier))
+
+      scan_tree_node.SetDefaultValue(pattern_list[0])
+
+    elif number_of_patterns > 1:
+      pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound)
+
+      scan_sub_node = self._BuildScanTreeNode(
+          pattern_table, ignore_list, is_bound)
+
+      logging.debug(u'Setting scan node for default value:\n{0:s}'.format(
+          scan_sub_node.ToDebugString()))
+
+      scan_tree_node.SetDefaultValue(scan_sub_node)
+
+    return scan_tree_node
+
+  def _GetMostSignificantPatternOffset(
+      self, pattern_list, similarity_weights, occurrence_weights,
+      value_weights):
+    """Returns the most significant pattern offset.
+
+    Args:
+      pattern_list: a list of patterns
+      similarity_weights: the similarity (pattern) weights.
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Raises:
+      ValueError: when pattern is an empty list.
+
+    Returns:
+      a pattern offset.
+    """
+    if not pattern_list:
+      raise ValueError(u'Missing pattern list.')
+
+    pattern_offset = None
+    number_of_patterns = len(pattern_list)
+
+    if number_of_patterns == 1:
+      pattern_offset = self._GetPatternOffsetForValueWeights(
+          value_weights)
+
+    elif number_of_patterns == 2:
+      pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
+          occurrence_weights, value_weights)
+
+    elif number_of_patterns > 2:
+      pattern_offset = self._GetPatternOffsetForSimilarityWeights(
+          similarity_weights, occurrence_weights, value_weights)
+
+    logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForOccurrenceWeights(
+      self, occurrence_weights, value_weights):
+    """Returns the most significant pattern offset based on the value weights.
+
+    Args:
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Returns:
+      a pattern offset.
+    """
+    debug_string = ""
+    pattern_offset = None
+
+    largest_weight = occurrence_weights.GetLargestWeight()
+    logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight(
+          largest_weight)
+      number_of_occurrence_offsets = len(occurrence_weight_offsets)
+    else:
+      number_of_occurrence_offsets = 0
+
+    if number_of_occurrence_offsets == 0:
+      pattern_offset = self._GetPatternOffsetForValueWeights(
+          value_weights)
+
+    elif number_of_occurrence_offsets == 1:
+      pattern_offset = occurrence_weight_offsets[0]
+
+    else:
+      largest_weight = 0
+      largest_value_weight = 0
+
+      for occurrence_offset in occurrence_weight_offsets:
+        value_weight = value_weights.GetWeightForOffset(
+            occurrence_offset)
+
+        debug_string = (
+            u'Occurrence offset: {0:d} value weight: {1:d}').format(
+                occurrence_offset, value_weight)
+
+        if not pattern_offset or largest_weight < value_weight:
+          largest_weight = value_weight
+          pattern_offset = occurrence_offset
+
+          debug_string += u' largest value weight: {0:d}'.format(
+              largest_value_weight)
+
+        logging.debug(u'{0:s}'.format(debug_string))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForSimilarityWeights(
+      self, similarity_weights, occurrence_weights, value_weights):
+    """Returns the most significant pattern offset.
+
+    Args:
+      similarity_weights: the similarity (pattern) weights.
+      occurrence_weights: the occurrence (pattern) weights.
+      value_weights: the value (pattern) weights.
+
+    Returns:
+      a pattern offset.
+    """
+    debug_string = ""
+    pattern_offset = None
+
+    largest_weight = similarity_weights.GetLargestWeight()
+    logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      similarity_weight_offsets = similarity_weights.GetOffsetsForWeight(
+          largest_weight)
+      number_of_similarity_offsets = len(similarity_weight_offsets)
+    else:
+      number_of_similarity_offsets = 0
+
+    if number_of_similarity_offsets == 0:
+      pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
+          occurrence_weights, value_weights)
+
+    elif number_of_similarity_offsets == 1:
+      pattern_offset = similarity_weight_offsets[0]
+
+    else:
+      largest_weight = 0
+      largest_value_weight = 0
+
+      for similarity_offset in similarity_weight_offsets:
+        occurrence_weight = occurrence_weights.GetWeightForOffset(
+            similarity_offset)
+
+        debug_string = (
+            u'Similarity offset: {0:d} occurrence weight: {1:d}').format(
+                similarity_offset, occurrence_weight)
+
+        if largest_weight > 0 and largest_weight == occurrence_weight:
+          value_weight = value_weights.GetWeightForOffset(
+              similarity_offset)
+
+          debug_string += u' value weight: {0:d}'.format(value_weight)
+
+          if largest_value_weight < value_weight:
+            largest_weight = 0
+
+        if not pattern_offset or largest_weight < occurrence_weight:
+          largest_weight = occurrence_weight
+          pattern_offset = similarity_offset
+
+          largest_value_weight = value_weights.GetWeightForOffset(
+              similarity_offset)
+
+          debug_string += u' largest value weight: {0:d}'.format(
+              largest_value_weight)
+
+        logging.debug(u'{0:s}'.format(debug_string))
+
+    return pattern_offset
+
+  def _GetPatternOffsetForValueWeights(
+      self, value_weights):
+    """Returns the most significant pattern offset based on the value weights.
+
+    Args:
+      value_weights: the value (pattern) weights.
+
+    Raises:
+      RuntimeError: no value weight offset were found.
+
+    Returns:
+      a pattern offset.
+    """
+    largest_weight = value_weights.GetLargestWeight()
+    logging.debug(u'Largest value weight: {0:d}'.format(largest_weight))
+
+    if largest_weight > 0:
+      value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight)
+      number_of_value_offsets = len(value_weight_offsets)
+    else:
+      number_of_value_offsets = 0
+
+    if number_of_value_offsets == 0:
+      raise RuntimeError(u'No value weight offsets found.')
+
+    return value_weight_offsets[0]
+
+  def _PatternsToDebugString(self, pattern_list):
+    """Converts the list of patterns into a debug string."""
+    entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list])
+
+    return u''.join([u'[', entries, u']'])
+
+
+class ScanTreeNode(object):
+  """Class that implements a scan tree node."""
+
+  def __init__(self, pattern_offset):
+    """Initializes the scan tree node.
+
+    Args:
+      pattern_offset: the offset in the pattern to which the node
+                      applies.
+    """
+    super(ScanTreeNode, self).__init__()
+    self._byte_values = {}
+    self.default_value = None
+    self.parent = None
+    self.pattern_offset = pattern_offset
+
+  def AddByteValue(self, byte_value, scan_object):
+    """Adds a byte value.
+
+    Args:
+      byte_value:  the corresponding byte value.
+      scan_object: the scan object, either a scan sub node or a pattern.
+
+    Raises:
+      ValueError: if byte value is out of bounds or if the node already
+                  contains a scan object for the byte value.
+    """
+    if isinstance(byte_value, str):
+      byte_value = ord(byte_value)
+
+    if byte_value < 0 or byte_value > 255:
+      raise ValueError(u'Invalid byte value, value out of bounds.')
+
+    if byte_value in self._byte_values:
+      raise ValueError(u'Byte value already set.')
+
+    if isinstance(scan_object, ScanTreeNode):
+      scan_object.parent = self
+
+    self._byte_values[byte_value] = scan_object
+
+  def CompareByteValue(
+      self, data, data_offset, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the bounded scan tree.
+
+       This function will return partial matches on the ata block block
+       boundary as long as the total data size has not been reached.
+
+    Args:
+      data: a buffer containing raw data.
+      data_offset: the offset in the raw data in the buffer.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+
+    Returns:
+      the resulting scan object which is either a ScanTreeNode or Pattern
+      or None.
+
+    Raises:
+      RuntimeError: if the data offset, total data offset, total data size
+                    or pattern offset value is out of bounds.
+    """
+    found_match = False
+    scan_tree_byte_value = 0
+
+    if data_offset < 0 or data_offset >= data_size:
+      raise RuntimeError(u'Invalid data offset, value out of bounds.')
+
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size, value out of bounds.')
+
+    if total_data_offset < 0 or (
+        total_data_size is not None and total_data_offset >= total_data_size):
+      raise RuntimeError(u'Invalid total data offset, value out of bounds.')
+
+    if (total_data_size is not None and
+        total_data_offset + data_size >= total_data_size):
+      match_on_boundary = True
+    else:
+      match_on_boundary = False
+
+    data_offset += self.pattern_offset
+
+    if not match_on_boundary and data_offset >= data_size:
+      raise RuntimeError(u'Invalid pattern offset value, out of bounds.')
+
+    if data_offset < data_size:
+      data_byte_value = ord(data[data_offset])
+
+      for scan_tree_byte_value in self._byte_values:
+        if data_byte_value == scan_tree_byte_value:
+          found_match = True
+          break
+
+    if found_match:
+      scan_object = self._byte_values[scan_tree_byte_value]
+
+      logging.debug(
+          u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset)
+      )
+
+    else:
+      scan_object = self.default_value
+
+      if not scan_object:
+        scan_object = self.parent
+        while scan_object and not scan_object.default_value:
+          scan_object = scan_object.parent
+
+        if scan_object:
+          scan_object = scan_object.default_value
+
+    return scan_object
+
+  def SetDefaultValue(self, scan_object):
+    """Sets the default (non-match) value.
+
+    Args:
+      scan_object: the scan object, either a scan sub node or a pattern.
+
+    Raises:
+      ValueError: if the default value is already set.
+    """
+    if self.default_value:
+      raise ValueError(u'Default value already set.')
+
+    self.default_value = scan_object
+
+  def ToDebugString(self, indentation_level=1):
+    """Converts the scan tree node into a debug string."""
+    indentation = u'  ' * indentation_level
+
+    header = u'{0:s}pattern offset: {1:d}\n'.format(
+        indentation, self.pattern_offset)
+
+    entries = u''
+
+    for byte_value in self._byte_values:
+      entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value)
+
+      if isinstance(self._byte_values[byte_value], ScanTreeNode):
+        entries += u'{0:s}scan tree node:\n'.format(indentation)
+        entries += self._byte_values[byte_value].ToDebugString(
+            indentation_level + 1)
+
+      elif isinstance(self._byte_values[byte_value], patterns.Pattern):
+        entries += u'{0:s}pattern: {1:s}\n'.format(
+            indentation, self._byte_values[byte_value].identifier)
+
+    default = u'{0:s}default value:\n'.format(indentation)
+
+    if isinstance(self.default_value, ScanTreeNode):
+      default += u'{0:s}scan tree node:\n'.format(indentation)
+      default += self.default_value.ToDebugString(indentation_level + 1)
+
+    elif isinstance(self.default_value, patterns.Pattern):
+      default += u'{0:s}pattern: {1:s}\n'.format(
+          indentation, self.default_value.identifier)
+
+    return u''.join([header, entries, default, u'\n'])
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the scan tree classes."""
+
+import unittest
+
+from plaso.classifier import patterns
+from plaso.classifier import scan_tree
+from plaso.classifier import specification
+
+
+class ScanTreeNodeTest(unittest.TestCase):
+  """Class to test the scan tree node."""
+
+  def testAddByteValueWithPattern(self):
+    """Function to test the add byte value with pattern function."""
+    scan_node = scan_tree.ScanTreeNode(0)
+
+    format_regf = specification.Specification('REGF')
+    format_regf.AddNewSignature('regf', offset=0)
+
+    format_esedb = specification.Specification('ESEDB')
+    format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
+
+    signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4)
+    signature_regf = specification.Signature('regf', offset=0)
+
+    pattern_regf = patterns.Pattern(0, signature_regf, format_regf)
+    pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb)
+
+    scan_node.AddByteValue('r', pattern_regf)
+    scan_node.AddByteValue('\xef', pattern_esedb)
+
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 'r', pattern_regf)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, -1, pattern_regf)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 256, pattern_regf)
+
+  def testAddByteValueWithScanNode(self):
+    """Function to test the add byte value with scan node function."""
+    scan_node = scan_tree.ScanTreeNode(0)
+    scan_sub_node_0x41 = scan_tree.ScanTreeNode(1)
+    scan_sub_node_0x80 = scan_tree.ScanTreeNode(1)
+
+    scan_node.AddByteValue(0x41, scan_sub_node_0x41)
+    scan_node.AddByteValue(0x80, scan_sub_node_0x80)
+
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80)
+    self.assertRaises(
+        ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,749 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains the classes for a scan tree-based format scanner."""
+
+import logging
+import os
+
+from plaso.classifier import patterns
+from plaso.classifier import range_list
+from plaso.classifier import scan_tree
+
+
+class _ScanMatch(object):
+  """Class that implements a scan match."""
+
+  def __init__(self, total_data_offset, pattern):
+    """Initializes the scan result.
+
+    Args:
+      total_data_offset: the offset of the resulting match relative
+                         to the start of the total data scanned.
+      pattern: the pattern matched.
+    """
+    super(_ScanMatch, self).__init__()
+    self.total_data_offset = total_data_offset
+    self.pattern = pattern
+
+  @property
+  def specification(self):
+    """The specification."""
+    return self.pattern.specification
+
+
+class _ScanResult(object):
+  """Class that implements a scan result."""
+
+  def __init__(self, specification):
+    """Initializes the scan result.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+    """
+    super(_ScanResult, self).__init__()
+    self.specification = specification
+    self.scan_matches = []
+
+  @property
+  def identifier(self):
+    """The specification identifier."""
+    return self.specification.identifier
+
+
+class ScanState(object):
+  """Class that implements a scan state."""
+
+  # The state definitions.
+  _SCAN_STATE_START = 1
+  _SCAN_STATE_SCANNING = 2
+  _SCAN_STATE_STOP = 3
+
+  def __init__(self, scan_tree_node, total_data_size=None):
+    """Initializes the scan state.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    """
+    super(ScanState, self).__init__()
+    self._matches = []
+    self.remaining_data = None
+    self.remaining_data_size = 0
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_START
+    self.total_data_offset = 0
+    self.total_data_size = total_data_size
+
+  def AddMatch(self, total_data_offset, pattern):
+    """Adds a result to the state to scanning.
+
+    Args:
+      total_data_offset: the offset of the resulting match relative
+                         to the start total data scanned.
+      pattern: the pattern matched.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self._matches.append(_ScanMatch(total_data_offset, pattern))
+
+  def GetMatches(self):
+    """Retrieves a list containing the results.
+
+    Returns:
+      A list of scan matches (instances of _ScanMatch).
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if self.state != self._SCAN_STATE_STOP:
+      raise RuntimeError(u'Unsupported scan state.')
+
+    return self._matches
+
+  def Reset(self, scan_tree_node):
+    """Resets the state to start.
+
+       This function will clear the remaining data.
+
+    Args:
+      scan_tree_node: the corresponding scan tree node or None.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if self.state != self._SCAN_STATE_STOP:
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.remaining_data = None
+    self.remaining_data_size = 0
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_START
+
+  def Scanning(self, scan_tree_node, total_data_offset):
+    """Sets the state to scanning.
+
+    Args:
+      scan_tree_node: the active scan tree node.
+      total_data_offset: the offset of the resulting match relative
+                         to the start of the total data scanned.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.scan_tree_node = scan_tree_node
+    self.state = self._SCAN_STATE_SCANNING
+    self.total_data_offset = total_data_offset
+
+  def Stop(self):
+    """Sets the state to stop.
+
+    Raises:
+      RuntimeError: when a unsupported state is encountered.
+    """
+    if (self.state != self._SCAN_STATE_START and
+        self.state != self._SCAN_STATE_SCANNING):
+      raise RuntimeError(u'Unsupported scan state.')
+
+    self.scan_tree_node = None
+    self.state = self._SCAN_STATE_STOP
+
+
+class ScanTreeScannerBase(object):
+  """Class that implements a scan tree-based scanner base."""
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(ScanTreeScannerBase, self).__init__()
+    self._scan_tree = None
+    self._specification_store = specification_store
+
+  def _ScanBufferScanState(
+      self, scan_tree_object, scan_state, data, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the scan tree.
+
+    This function implements a Boyer–Moore–Horspool equivalent approach
+    in combination with the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+
+    Raises:
+      RuntimeError: if the total data offset, total data size or the last
+                    pattern offset value is out of bounds
+    """
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size, value out of bounds.')
+
+    if total_data_offset < 0 or (
+        total_data_size is not None and total_data_offset >= total_data_size):
+      raise RuntimeError(u'Invalid total data offset, value out of bounds.')
+
+    data_offset = 0
+    scan_tree_node = scan_state.scan_tree_node
+
+    if scan_state.remaining_data:
+      # str.join() should be more efficient then concatenation by +.
+      data = ''.join([scan_state.remaining_data, data])
+      data_size += scan_state.remaining_data_size
+
+      scan_state.remaining_data = None
+      scan_state.remaining_data_size = 0
+
+    if (total_data_size is not None and
+        total_data_offset + data_size >= total_data_size):
+      match_on_boundary = True
+    else:
+      match_on_boundary = False
+
+    while data_offset < data_size:
+      if (not match_on_boundary and
+          data_offset + scan_tree_object.largest_length >= data_size):
+        break
+
+      found_match = False
+      scan_done = False
+
+      while not scan_done:
+        scan_object = scan_tree_node.CompareByteValue(
+            data, data_offset, data_size, total_data_offset,
+            total_data_size=total_data_size)
+
+        if isinstance(scan_object, scan_tree.ScanTreeNode):
+          scan_tree_node = scan_object
+        else:
+          scan_done = True
+
+      if isinstance(scan_object, patterns.Pattern):
+        pattern_length = len(scan_object.signature.expression)
+        data_last_offset = data_offset + pattern_length
+
+        if cmp(scan_object.signature.expression,
+               data[data_offset:data_last_offset]) == 0:
+
+          if (not scan_object.signature.is_bound or
+              scan_object.signature.offset == data_offset):
+            found_match = True
+
+            logging.debug(
+                u'Signature match at data offset: 0x{0:08x}.'.format(
+                    data_offset))
+
+            scan_state.AddMatch(total_data_offset + data_offset, scan_object)
+
+      if found_match:
+        skip_value = len(scan_object.signature.expression)
+        scan_tree_node = scan_tree_object.root_node
+      else:
+        last_pattern_offset = (
+            scan_tree_object.skip_table.skip_pattern_length - 1)
+
+        if data_offset + last_pattern_offset >= data_size:
+          raise RuntimeError(
+              u'Invalid last pattern offset, value out of bounds.')
+        skip_value = 0
+
+        while last_pattern_offset >= 0 and not skip_value:
+          last_data_offset = data_offset + last_pattern_offset
+          byte_value = ord(data[last_data_offset])
+          skip_value = scan_tree_object.skip_table[byte_value]
+          last_pattern_offset -= 1
+
+        if not skip_value:
+          skip_value = 1
+
+        scan_tree_node = scan_tree_object.root_node
+
+      data_offset += skip_value
+
+    if not match_on_boundary and data_offset < data_size:
+      scan_state.remaining_data = data[data_offset:data_size]
+      scan_state.remaining_data_size = data_size - data_offset
+
+    scan_state.Scanning(scan_tree_node, total_data_offset + data_offset)
+
+  def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state):
+    """Scans the remaining data in the scan state using the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+    """
+    if scan_state.remaining_data:
+      data = scan_state.remaining_data
+      data_size = scan_state.remaining_data_size
+
+      scan_state.remaining_data = None
+      scan_state.remaining_data_size = 0
+
+      # Setting the total data size will make boundary matches are returned
+      # in this scanning pass.
+      total_data_size = scan_state.total_data_size
+      if total_data_size is None:
+        total_data_size = scan_state.total_data_offset + data_size
+
+      self._ScanBufferScanState(
+          scan_tree_object, scan_state, data, data_size,
+          scan_state.total_data_offset, total_data_size=total_data_size)
+
+    scan_state.Stop()
+
+  def GetScanResults(self, scan_state):
+    """Retrieves the scan results.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+
+    Return:
+      A list of scan results (instances of _ScanResult).
+    """
+    scan_results = {}
+
+    for scan_match in scan_state.GetMatches():
+      specification = scan_match.specification
+      identifier = specification.identifier
+
+      logging.debug(
+          u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format(
+              scan_match.total_data_offset, identifier))
+
+      if identifier not in scan_results:
+        scan_results[identifier] = _ScanResult(specification)
+
+      scan_results[identifier].scan_matches.append(scan_match)
+
+    return scan_results.values()
+
+
+class Scanner(ScanTreeScannerBase):
+  """Class that implements a scan tree-based scanner."""
+
+  _READ_BUFFER_SIZE = 512
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(Scanner, self).__init__(specification_store)
+
+  def ScanBuffer(self, scan_state, data, data_size):
+    """Scans a buffer.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+    """
+    self._ScanBufferScanState(
+        self._scan_tree, scan_state, data, data_size,
+        scan_state.total_data_offset,
+        total_data_size=scan_state.total_data_size)
+
+  def ScanFileObject(self, file_object):
+    """Scans a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      A list of scan results (instances of ScanResult).
+    """
+    file_offset = 0
+
+    if hasattr(file_object, 'get_size'):
+      file_size = file_object.get_size()
+    else:
+      file_object.seek(0, os.SEEK_END)
+      file_size = file_object.tell()
+
+    scan_state = self.StartScan(total_data_size=file_size)
+
+    file_object.seek(file_offset, os.SEEK_SET)
+
+    while file_offset < file_size:
+      data = file_object.read(self._READ_BUFFER_SIZE)
+      data_size = len(data)
+
+      if data_size == 0:
+        break
+
+      self._ScanBufferScanState(
+          self._scan_tree, scan_state, data, data_size, file_offset,
+          total_data_size=file_size)
+
+      file_offset += data_size
+
+    self.StopScan(scan_state)
+
+    return self.GetScanResults(scan_state)
+
+  def StartScan(self, total_data_size=None):
+    """Starts a scan.
+
+       The function sets up the scanning related structures if necessary.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A scan state (instance of ScanState).
+
+    Raises:
+      RuntimeError: when total data size is invalid.
+    """
+    if total_data_size is not None and total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size.')
+
+    if self._scan_tree is None:
+      self._scan_tree = scan_tree.ScanTree(
+          self._specification_store, None)
+
+    return ScanState(self._scan_tree.root_node, total_data_size=total_data_size)
+
+  def StopScan(self, scan_state):
+    """Stops a scan.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+    """
+    self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
+
+
+class OffsetBoundScanner(ScanTreeScannerBase):
+  """Class that implements an offset-bound scan tree-based scanner."""
+
+  _READ_BUFFER_SIZE = 512
+
+  def __init__(self, specification_store):
+    """Initializes the scanner.
+
+    Args:
+      specification_store: the specification store (instance of
+                           SpecificationStore) that contains the format
+                           specifications.
+    """
+    super(OffsetBoundScanner, self).__init__(specification_store)
+    self._footer_scan_tree = None
+    self._footer_spanning_range = None
+    self._header_scan_tree = None
+    self._header_spanning_range = None
+
+  def _GetFooterRange(self, total_data_size):
+    """Retrieves the read buffer aligned footer range.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A range (instance of Range).
+    """
+    # The actual footer range is in reverse since the spanning footer range
+    # is based on positive offsets, where 0 is the end of file.
+    if self._footer_spanning_range.end_offset < total_data_size:
+      footer_range_start_offset = (
+          total_data_size - self._footer_spanning_range.end_offset)
+    else:
+      footer_range_start_offset = 0
+
+    # Calculate the lower bound modulus of the footer range start offset
+    # in increments of the read buffer size.
+    footer_range_start_offset /= self._READ_BUFFER_SIZE
+    footer_range_start_offset *= self._READ_BUFFER_SIZE
+
+    # Calculate the upper bound modulus of the footer range size
+    # in increments of the read buffer size.
+    footer_range_size = self._footer_spanning_range.size
+    remainder = footer_range_size % self._READ_BUFFER_SIZE
+    footer_range_size /= self._READ_BUFFER_SIZE
+
+    if remainder > 0:
+      footer_range_size += 1
+
+    footer_range_size *= self._READ_BUFFER_SIZE
+
+    return range_list.Range(footer_range_start_offset, footer_range_size)
+
+  def _GetHeaderRange(self):
+    """Retrieves the read buffer aligned header range.
+
+    Returns:
+      A range (instance of Range).
+    """
+    # Calculate the lower bound modulus of the header range start offset
+    # in increments of the read buffer size.
+    header_range_start_offset = self._header_spanning_range.start_offset
+    header_range_start_offset /= self._READ_BUFFER_SIZE
+    header_range_start_offset *= self._READ_BUFFER_SIZE
+
+    # Calculate the upper bound modulus of the header range size
+    # in increments of the read buffer size.
+    header_range_size = self._header_spanning_range.size
+    remainder = header_range_size % self._READ_BUFFER_SIZE
+    header_range_size /= self._READ_BUFFER_SIZE
+
+    if remainder > 0:
+      header_range_size += 1
+
+    header_range_size *= self._READ_BUFFER_SIZE
+
+    return range_list.Range(header_range_start_offset, header_range_size)
+
+  def _ScanBufferScanState(
+      self, scan_tree_object, scan_state, data, data_size, total_data_offset,
+      total_data_size=None):
+    """Scans a buffer using the scan tree.
+
+    This function implements a Boyer–Moore–Horspool equivalent approach
+    in combination with the scan tree.
+
+    Args:
+      scan_tree_object: the scan tree (instance of ScanTree).
+      scan_state: the scan state (instance of ScanState).
+      data: a buffer containing raw data.
+      data_size: the size of the raw data in the buffer.
+      total_data_offset: the offset of the data relative to the start of
+                         the total data scanned.
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    """
+    scan_done = False
+    scan_tree_node = scan_tree_object.root_node
+
+    while not scan_done:
+      data_offset = 0
+
+      scan_object = scan_tree_node.CompareByteValue(
+          data, data_offset, data_size, total_data_offset,
+          total_data_size=total_data_size)
+
+      if isinstance(scan_object, scan_tree.ScanTreeNode):
+        scan_tree_node = scan_object
+      else:
+        scan_done = True
+
+    if isinstance(scan_object, patterns.Pattern):
+      pattern_length = len(scan_object.signature.expression)
+      pattern_start_offset = scan_object.signature.offset
+      pattern_end_offset = pattern_start_offset + pattern_length
+
+      if cmp(scan_object.signature.expression,
+             data[pattern_start_offset:pattern_end_offset]) == 0:
+        scan_state.AddMatch(
+            total_data_offset + scan_object.signature.offset, scan_object)
+
+        logging.debug(
+            u'Signature match at data offset: 0x{0:08x}.'.format(data_offset))
+
+  # TODO: implement.
+  # def ScanBuffer(self, scan_state, data, data_size):
+  #   """Scans a buffer.
+
+  #   Args:
+  #     scan_state: the scan state (instance of ScanState).
+  #     data: a buffer containing raw data.
+  #     data_size: the size of the raw data in the buffer.
+  #   """
+  #   # TODO: fix footer scanning logic.
+  #   # need to know the file size here for the footers.
+
+  #   # TODO: check for clashing ranges?
+
+  #   header_range = self._GetHeaderRange()
+  #   footer_range = self._GetFooterRange(scan_state.total_data_size)
+
+  #   if self._scan_tree == self._header_scan_tree:
+  #     if (scan_state.total_data_offset >= header_range.start_offset and
+  #         scan_state.total_data_offset < header_range.end_offset):
+  #       self._ScanBufferScanState(
+  #           self._scan_tree, scan_state, data, data_size,
+  #           scan_state.total_data_offset,
+  #           total_data_size=scan_state.total_data_size)
+
+  #     elif scan_state.total_data_offset > header_range.end_offset:
+  #       # TODO: implement.
+  #       pass
+
+  #   if self._scan_tree == self._footer_scan_tree:
+  #     if (scan_state.total_data_offset >= footer_range.start_offset and
+  #           scan_state.total_data_offset < footer_range.end_offset):
+  #       self._ScanBufferScanState(
+  #           self._scan_tree, scan_state, data, data_size,
+  #           scan_state.total_data_offset,
+  #           total_data_size=scan_state.total_data_size)
+
+  def ScanFileObject(self, file_object):
+    """Scans a file-like object.
+
+    Args:
+      file_object: a file-like object.
+
+    Returns:
+      A scan state (instance of ScanState).
+    """
+    # TODO: add support for fixed size block-based reads.
+
+    if hasattr(file_object, 'get_size'):
+      file_size = file_object.get_size()
+    else:
+      file_object.seek(0, os.SEEK_END)
+      file_size = file_object.tell()
+
+    file_offset = 0
+    scan_state = self.StartScan(total_data_size=file_size)
+
+    if self._header_scan_tree.root_node is not None:
+      header_range = self._GetHeaderRange()
+
+      # TODO: optimize the read by supporting fixed size block-based reads.
+      # if file_offset < header_range.start_offset:
+      #   file_offset = header_range.start_offset
+
+      file_object.seek(file_offset, os.SEEK_SET)
+
+      # TODO: optimize the read by supporting fixed size block-based reads.
+      # data = file_object.read(header_range.size)
+      data = file_object.read(header_range.end_offset)
+      data_size = len(data)
+
+      if data_size > 0:
+        self._ScanBufferScanState(
+            self._scan_tree, scan_state, data, data_size, file_offset,
+            total_data_size=file_size)
+
+      file_offset += data_size
+
+      if self._footer_scan_tree.root_node is not None:
+        self.StopScan(scan_state)
+
+        self._scan_tree = self._footer_scan_tree
+        scan_state.Reset(self._scan_tree.root_node)
+
+    if self._footer_scan_tree.root_node is not None:
+      footer_range = self._GetFooterRange(file_size)
+
+      # Note that the offset in the footer scan tree start with 0. Make sure
+      # the data offset of the data being scanned is aligned with the offset
+      # in the scan tree.
+      if footer_range.start_offset < self._footer_spanning_range.end_offset:
+        data_offset = (
+            self._footer_spanning_range.end_offset - footer_range.start_offset)
+      else:
+        data_offset = 0
+
+      if file_offset < footer_range.start_offset:
+        file_offset = footer_range.start_offset
+
+      file_object.seek(file_offset, os.SEEK_SET)
+
+      data = file_object.read(self._READ_BUFFER_SIZE)
+      data_size = len(data)
+
+      if data_size > 0:
+        self._ScanBufferScanState(
+            self._scan_tree, scan_state, data[data_offset:],
+            data_size - data_offset, file_offset + data_offset,
+            total_data_size=file_size)
+
+    self.StopScan(scan_state)
+
+    return self.GetScanResults(scan_state)
+
+  def StartScan(self, total_data_size=None):
+    """Starts a scan.
+
+       The function sets up the scanning related structures if necessary.
+
+    Args:
+      total_data_size: optional value to indicate the total data size.
+                       The default is None.
+    Returns:
+      A list of scan results (instances of ScanResult).
+
+    Raises:
+      RuntimeError: when total data size is invalid.
+    """
+    if total_data_size is None or total_data_size < 0:
+      raise RuntimeError(u'Invalid total data size.')
+
+    if self._header_scan_tree is None:
+      self._header_scan_tree = scan_tree.ScanTree(
+          self._specification_store, True,
+          offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE)
+
+    if self._header_spanning_range is None:
+      spanning_range = self._header_scan_tree.range_list.GetSpanningRange()
+      self._header_spanning_range = spanning_range
+
+    if self._footer_scan_tree is None:
+      self._footer_scan_tree = scan_tree.ScanTree(
+          self._specification_store, True,
+          offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE)
+
+    if self._footer_spanning_range is None:
+      spanning_range = self._footer_scan_tree.range_list.GetSpanningRange()
+      self._footer_spanning_range = spanning_range
+
+    if self._header_scan_tree.root_node is not None:
+      self._scan_tree = self._header_scan_tree
+    elif self._footer_scan_tree.root_node is not None:
+      self._scan_tree = self._footer_scan_tree
+    else:
+      self._scan_tree = None
+
+    if self._scan_tree is not None:
+      root_node = self._scan_tree.root_node
+    else:
+      root_node = None
+
+    return ScanState(root_node, total_data_size=total_data_size)
+
+  def StopScan(self, scan_state):
+    """Stops a scan.
+
+    Args:
+      scan_state: the scan state (instance of ScanState).
+    """
+    self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
+    self._scan_tree = None
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains tests for the format scanner classes."""
+
+import unittest
+
+from plaso.classifier import scanner
+from plaso.classifier import test_lib
+
+
+class ScannerTest(unittest.TestCase):
+  """Class to test the scanner."""
+
+  def testInitialize(self):
+    """Function to test the initialize function."""
+    store = test_lib.CreateSpecificationStore()
+
+    # Signature for LNK
+    data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
+             '\x00\x00\x00\x46')
+
+    # Signature for REGF
+    data2 = 'regf'
+
+    # Random data
+    data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4'
+
+    # Boundary scan test
+    data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+              '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK')
+    data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+              '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z')
+
+    # Large buffer test
+    data5_size = 1024 * 1024
+    data5 = '\x00' * (data5_size - 4)
+    data5 += 'PK\x07\x08'
+
+    test_scanner = scanner.Scanner(store)
+
+    total_data_size = len(data1)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data1, len(data1))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data1, len(data1))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data2)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data2, len(data2))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data2, len(data2))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data3)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data3, len(data3))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 0)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data3, len(data3))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 0)
+
+    total_data_size = len(data4a) + len(data4b)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
+    test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    scan_state = test_scanner.StartScan(total_data_size=None)
+    test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
+    test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+    total_data_size = len(data5)
+    scan_state = test_scanner.StartScan(total_data_size=total_data_size)
+    test_scanner.ScanBuffer(scan_state, data5, len(data5))
+    test_scanner.StopScan(scan_state)
+
+    self.assertEqual(len(scan_state.GetMatches()), 1)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The format specification classes."""
+
+
+class Signature(object):
+  """Class that defines a signature of a format specification.
+
+  The signature consists of a byte string expression, an optional
+  offset relative to the start of the data, and a value to indidate
+  if the expression is bound to the offset.
+  """
+  def __init__(self, expression, offset=None, is_bound=False):
+    """Initializes the signature.
+
+    Args:
+      expression: string containing the expression of the signature.
+                  The expression consists of a byte string at the moment
+                  regular expression (regexp) are not supported.
+      offset: the offset of the signature or None by default. None is used
+              to indicate the signature has no offset. A positive offset
+              is relative from the start of the data a negative offset
+              is relative from the end of the data.
+      is_bound: boolean value to indicate the signature must be bound to
+                the offset or False by default.
+    """
+    self.expression = expression
+    self.offset = offset
+    self.is_bound = is_bound
+
+
+class Specification(object):
+  """Class that contains a format specification."""
+
+  def __init__(self, identifier):
+    """Initializes the specification.
+
+    Args:
+      identifier: string containing a unique name for the format.
+    """
+    self.identifier = identifier
+    self.mime_types = []
+    self.signatures = []
+    self.universal_type_identifiers = []
+
+  def AddMimeType(self, mime_type):
+    """Adds a MIME type."""
+    self.mime_types.append(mime_type)
+
+  def AddNewSignature(self, expression, offset=None, is_bound=False):
+    """Adds a signature.
+
+    Args:
+      expression: string containing the expression of the signature.
+      offset: the offset of the signature or None by default. None is used
+              to indicate the signature has no offset. A positive offset
+              is relative from the start of the data a negative offset
+              is relative from the end of the data.
+      is_bound: boolean value to indicate the signature must be bound to
+                the offset or False by default.
+    """
+    self.signatures.append(
+        Signature(expression, offset=offset, is_bound=is_bound))
+
+  def AddUniversalTypeIdentifier(self, universal_type_identifiers):
+    """Adds a Universal Type Identifier (UTI)."""
+    self.universal_type_identifiers.append(universal_type_identifiers)
+
+
+class SpecificationStore(object):
+  """Class that servers as a store for specifications."""
+
+  def __init__(self):
+    """Initializes the specification store."""
+    self._format_specifications = {}
+
+  @property
+  def specifications(self):
+    """A specifications iterator object."""
+    return self._format_specifications.itervalues()
+
+  def AddNewSpecification(self, identifier):
+    """Adds a new specification.
+
+    Args:
+      identifier: a string containing the format identifier,
+                  which should be unique for the store.
+
+    Returns:
+      a instance of Specification.
+
+    Raises:
+      ValueError: if the store already contains a specification with
+                  the same identifier.
+    """
+    if identifier in self._format_specifications:
+      raise ValueError("specification {0:s} is already defined in "
+                       "store.".format(identifier))
+
+    self._format_specifications[identifier] = Specification(identifier)
+
+    return self._format_specifications[identifier]
+
+  def AddSpecification(self, specification):
+    """Adds a specification.
+
+    Args:
+      specification: the specification (instance of Specification).
+
+    Raises:
+      KeyError: if the store already contains a specification with
+                the same identifier.
+    """
+    if specification.identifier in self._format_specifications:
+      raise KeyError(
+          u'Specification {0:s} is already defined in store.'.format(
+              specification.identifier))
+
+    self._format_specifications[specification.identifier] = specification
+
+  def ReadFromFileObject(self, unused_file_object):
+    """Reads the specification store from a file-like object.
+
+    Args:
+      unused_file_object: A file-like object.
+
+    Raises:
+      RuntimeError: because functionality is not implemented yet.
+    """
+    # TODO: implement this function.
+    raise RuntimeError(u'Function not implemented.')
+
+  def ReadFromFile(self, filename):
+    """Reads the specification store from a file.
+
+    Args:
+      filename: The name of the file.
+    """
+    file_object = open(filename, 'r')
+    self.ReadFromFileObject(file_object)
+    file_object.close()
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the format specification classes."""
+
+import unittest
+
+from plaso.classifier import specification
+
+
+class SpecificationStoreTest(unittest.TestCase):
+  """Class to test the specification store."""
+
+  def testAddSpecification(self):
+    """Function to test the add specification function."""
+    store = specification.SpecificationStore()
+
+    format_regf = specification.Specification('REGF')
+    format_regf.AddNewSignature('regf', offset=0)
+
+    format_esedb = specification.Specification('ESEDB')
+    format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
+
+    store.AddSpecification(format_regf)
+    store.AddSpecification(format_esedb)
+
+    with self.assertRaises(KeyError):
+      store.AddSpecification(format_regf)
+
+
+if __name__ == '__main__':
+  unittest.main()
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Shared test cases."""
+
+from plaso.classifier import specification
+
+
+def CreateSpecificationStore():
+  """Creates a format specification store for testing purposes.
+
+  Returns:
+    A format specification store (instance of SpecificationStore).
+  """
+  store = specification.SpecificationStore()
+
+  test_specification = store.AddNewSpecification('7zip')
+  test_specification.AddMimeType('application/x-7z-compressed')
+  test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive')
+  test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0)
+
+  test_specification = store.AddNewSpecification('esedb')
+  test_specification.AddNewSignature(
+      '\xef\xcd\xab\x89', offset=4, is_bound=True)
+
+  test_specification = store.AddNewSpecification('evt')
+  test_specification.AddNewSignature(
+      '\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0,
+      is_bound=True)
+
+  test_specification = store.AddNewSpecification('evtx')
+  test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('ewf')
+  test_specification.AddNewSignature(
+      'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
+
+  test_specification = specification.Specification('ewf_logical')
+  test_specification.AddNewSignature(
+      'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('lnk')
+  test_specification.AddNewSignature(
+      '\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
+      '\x00\x00\x00\x46', offset=0)
+
+  test_specification = store.AddNewSpecification('msiecf_index_dat')
+  test_specification.AddNewSignature(
+      'Client UrlCache MMF Ver ', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('nk2')
+  test_specification.AddNewSignature(
+      '\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0,
+      is_bound=True)
+
+  test_specification = store.AddNewSpecification('olecf')
+  test_specification.AddNewSignature(
+      '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True)
+  test_specification.AddNewSignature(
+      '\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('pff')
+  test_specification.AddNewSignature('!BDN', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('qcow')
+  test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('rar')
+  test_specification.AddMimeType('application/x-rar-compressed')
+  test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive')
+  test_specification.AddNewSignature(
+      'Rar!\x1a\x07\x00', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('regf')
+  test_specification.AddNewSignature('regf', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('thumbache_db_cache')
+  test_specification.AddNewSignature('CMMM', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('thumbache_db_index')
+  test_specification.AddNewSignature('IMMM', offset=0, is_bound=True)
+
+  test_specification = store.AddNewSpecification('zip')
+  test_specification.AddMimeType('application/zip')
+  test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive')
+  # WinZip 8 signature.
+  test_specification.AddNewSignature('PK00', offset=0, is_bound=True)
+  test_specification.AddNewSignature('PK\x01\x02')
+  test_specification.AddNewSignature('PK\x03\x04', offset=0)
+  test_specification.AddNewSignature('PK\x05\x05')
+  # Will be at offset 0 when the archive is empty.
+  test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True)
+  test_specification.AddNewSignature('PK\x06\x06')
+  test_specification.AddNewSignature('PK\x06\x07')
+  test_specification.AddNewSignature('PK\x06\x08')
+  # Will be at offset 0 when this is spanned archive.
+  test_specification.AddNewSignature('PK\x07\x08')
+
+  return store