Import from old repository
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains the format classifier classes.
|
||||
|
||||
Plaso is a tool that extracts events from files on a file system.
|
||||
For this it either reads files from a mounted file system or from an image.
|
||||
It uses an exhaustive approach to determine parse events from a file, meaning
|
||||
that it passes the file first to parser A and if that fails it continues with
|
||||
parser B.
|
||||
|
||||
The classifier is designed to be able to more quickly determine the format of
|
||||
a file and limit the number of parsers part of the exhaustive approach.
|
||||
|
||||
The current version of the classifier uses signatures to identify file formats.
|
||||
Some signatures must always be defined at a specific offset, this is referred to
|
||||
as an offset-bound signature or bound for short. Other signatures are commonly
|
||||
found at a specific offset but not necessarily. The last form of signatures is
|
||||
unbound, meaning that they don't have a fixed or common location where they can
|
||||
be found.
|
||||
|
||||
A specification is a collection of signatures with additional metadata that
|
||||
defines a specific file format. These specifications are grouped into a store
|
||||
for ease of use, e.g. so that they can be read from a configuration file all
|
||||
at once.
|
||||
|
||||
The classifier requires a scanner to analyze the data in a file. The scanner
|
||||
uses the specifications in a store to scan for the signatures or a certain
|
||||
format.
|
||||
|
||||
The classifier allows for multiple methods of scanning a file:
|
||||
* full: the entire file is scanned. This is the default scanning method.
|
||||
* head-tail: only the beginning (head) and the end (tail) of the file is
|
||||
scanned. This approach is more efficient for larger files.
|
||||
The buffer size is used as the size of the data that is scanned.
|
||||
Smaller files are scanned entirely.
|
||||
|
||||
The classifier returns zero or more classifications which point to a format
|
||||
specification and the scan results for the signatures defined by
|
||||
the specification.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
class Classification(object):
|
||||
"""This class represents a format classification.
|
||||
|
||||
The format classification consists of a format specification and
|
||||
scan results.
|
||||
"""
|
||||
|
||||
def __init__(self, specification, scan_matches):
|
||||
"""Initializes the classification.
|
||||
|
||||
Args:
|
||||
specification: the format specification (instance of Specification).
|
||||
scan_matches: the list of scan matches (instances of _ScanMatch).
|
||||
|
||||
Raises:
|
||||
TypeError: if the specification is not of type Specification.
|
||||
"""
|
||||
self._specification = specification
|
||||
self.scan_matches = scan_matches
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
"""The classification type."""
|
||||
return self._specification.identifier
|
||||
|
||||
@property
|
||||
def magic_types(self):
|
||||
"""The magic types or an empty list if none."""
|
||||
return self._specification.magic_types
|
||||
|
||||
@property
|
||||
def mime_types(self):
|
||||
"""The mime type or an empty list if none."""
|
||||
return self._specification.mime_types
|
||||
|
||||
|
||||
class Classifier(object):
|
||||
"""Class for classifying formats in raw data.
|
||||
|
||||
The classifier is initialized with one or more specifications.
|
||||
After which it can be used to classify data in files or file-like objects.
|
||||
|
||||
The actual scanning of the data is done by the scanner, these are separate
|
||||
to allow for the scanner to easily be replaced for a more efficient
|
||||
alternative if necessary.
|
||||
|
||||
For an example of how the classifier is to be used see: classify.py.
|
||||
"""
|
||||
BUFFER_SIZE = 16 * 1024 * 1024
|
||||
|
||||
def __init__(self, scanner):
|
||||
"""Initializes the classifier and sets up the scanning related structures.
|
||||
|
||||
Args:
|
||||
scanner: an instance of the signature scanner.
|
||||
"""
|
||||
self._scanner = scanner
|
||||
|
||||
def _GetClassifications(self, scan_results):
|
||||
"""Retrieves the classifications based on the scan results.
|
||||
|
||||
Multiple scan results are combined into a single classification.
|
||||
|
||||
Args:
|
||||
scan_results: a list containing instances of _ScanResult.
|
||||
|
||||
Returns:
|
||||
a list of instances of Classification.
|
||||
"""
|
||||
classifications = {}
|
||||
|
||||
for scan_result in scan_results:
|
||||
for scan_match in scan_result.scan_matches:
|
||||
logging.debug(
|
||||
u'scan match at offset: 0x{0:08x} specification: {1:s}'.format(
|
||||
scan_match.total_data_offset, scan_result.identifier))
|
||||
|
||||
if scan_result.identifier not in classifications:
|
||||
classifications[scan_result.identifier] = Classification(
|
||||
scan_result.specification, scan_result.scan_matches)
|
||||
|
||||
return classifications.values()
|
||||
|
||||
def ClassifyBuffer(self, data, data_size):
|
||||
"""Classifies the data in a buffer, assumes all necessary data is available.
|
||||
|
||||
Args:
|
||||
data: a buffer containing raw data.
|
||||
data_size: the size of the raw data in the buffer.
|
||||
|
||||
Returns:
|
||||
a list of classifications or an empty list.
|
||||
"""
|
||||
scan_state = self._scanner.StartScan()
|
||||
self._scanner.ScanBuffer(scan_state, data, data_size)
|
||||
self._scanner.StopScan(scan_state)
|
||||
|
||||
return self._GetClassifications(scan_state.GetResults())
|
||||
|
||||
def ClassifyFileObject(self, file_object):
|
||||
"""Classifies the data in a file-like object.
|
||||
|
||||
Args:
|
||||
file_object: a file-like object.
|
||||
|
||||
Returns:
|
||||
a list of classifier classifications or an empty list.
|
||||
"""
|
||||
scan_results = self._scanner.ScanFileObject(file_object)
|
||||
|
||||
return self._GetClassifications(scan_results)
|
||||
|
||||
def ClassifyFile(self, filename):
|
||||
"""Classifies the data in a file.
|
||||
|
||||
Args:
|
||||
filename: the name of the file.
|
||||
|
||||
Returns:
|
||||
a list of classifier classifications or an empty list.
|
||||
"""
|
||||
classifications = []
|
||||
with open(filename, 'rb') as file_object:
|
||||
classifications = self.ClassifyFileObject(file_object)
|
||||
return classifications
|
||||
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains tests for the format classifier classes."""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from plaso.classifier import classifier
|
||||
from plaso.classifier import scanner
|
||||
from plaso.classifier import test_lib
|
||||
|
||||
|
||||
class ClassifierTest(unittest.TestCase):
|
||||
"""Class to test Classifier."""
|
||||
|
||||
def setUp(self):
|
||||
"""Function to test the initialize function."""
|
||||
self._store = test_lib.CreateSpecificationStore()
|
||||
|
||||
self._test_file1 = os.path.join('test_data', 'NTUSER.DAT')
|
||||
self._test_file2 = os.path.join('test_data', 'syslog.zip')
|
||||
|
||||
def testClassifyFileWithScanner(self):
|
||||
"""Function to test the classify file function."""
|
||||
test_scanner = scanner.Scanner(self._store)
|
||||
|
||||
test_classifier = classifier.Classifier(test_scanner)
|
||||
classifications = test_classifier.ClassifyFile(self._test_file1)
|
||||
self.assertEqual(len(classifications), 1)
|
||||
|
||||
# TODO: assert the contents of the classification.
|
||||
|
||||
test_classifier = classifier.Classifier(test_scanner)
|
||||
classifications = test_classifier.ClassifyFile(self._test_file2)
|
||||
self.assertEqual(len(classifications), 1)
|
||||
|
||||
# TODO: assert the contents of the classification.
|
||||
|
||||
def testClassifyFileWithOffsetBoundScanner(self):
|
||||
"""Function to test the classify file function."""
|
||||
test_scanner = scanner.OffsetBoundScanner(self._store)
|
||||
|
||||
test_classifier = classifier.Classifier(test_scanner)
|
||||
classifications = test_classifier.ClassifyFile(self._test_file1)
|
||||
self.assertEqual(len(classifications), 1)
|
||||
|
||||
# TODO: assert the contents of the classification.
|
||||
|
||||
test_classifier = classifier.Classifier(test_scanner)
|
||||
classifications = test_classifier.ClassifyFile(self._test_file2)
|
||||
self.assertEqual(len(classifications), 1)
|
||||
|
||||
# TODO: assert the contents of the classification.
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains a small classify test program."""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import logging
|
||||
|
||||
from plaso.classifier import classifier
|
||||
from plaso.classifier import scanner
|
||||
from plaso.classifier import test_lib
|
||||
|
||||
|
||||
def Main():
|
||||
args_parser = argparse.ArgumentParser(
|
||||
description='Classify test program.')
|
||||
|
||||
args_parser.add_argument(
|
||||
'-t', '--type', type='choice', metavar='TYPE', action='store',
|
||||
dest='scanner_type', choices=['scan-tree', 'scan_tree'],
|
||||
default='scan-tree', help='The scanner type')
|
||||
|
||||
args_parser.add_argument(
|
||||
'-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
help='Print verbose output')
|
||||
|
||||
args_parser.add_argument(
|
||||
'filenames', nargs='+', action='store', metavar='FILENAMES',
|
||||
default=None, help='The input filename(s) to classify.')
|
||||
|
||||
options = args_parser.parse_args()
|
||||
|
||||
if options.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
files_to_classify = []
|
||||
for input_glob in options.filenames:
|
||||
files_to_classify += glob.glob(input_glob)
|
||||
|
||||
store = test_lib.CreateSpecificationStore()
|
||||
|
||||
if options.scanner_type not in ['scan-tree', 'scan_tree']:
|
||||
print u'Unsupported scanner type defaulting to: scan-tree'
|
||||
|
||||
scan = scanner.Scanner(store)
|
||||
classify = classifier.Classifier(scan)
|
||||
|
||||
for input_filename in files_to_classify:
|
||||
classifications = classify.ClassifyFile(input_filename)
|
||||
|
||||
print u'File: {0:s}'.format(input_filename)
|
||||
if not classifications:
|
||||
print u'No classifications found.'
|
||||
else:
|
||||
print u'Classifications:'
|
||||
for classification in classifications:
|
||||
print u'\tformat: {0:s}'.format(classification.identifier)
|
||||
|
||||
print u''
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
Main()
|
||||
@@ -0,0 +1,308 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The patterns classes used by the scan tree-based format scanner."""
|
||||
|
||||
|
||||
class _ByteValuePatterns(object):
|
||||
"""Class that implements a mapping between byte value and patterns.
|
||||
|
||||
The byte value patterns are used in the scan tree-based format scanner
|
||||
to map a byte value to one or more patterns.
|
||||
"""
|
||||
|
||||
def __init__(self, byte_value):
|
||||
"""Initializes the pattern table (entry) byte value.
|
||||
|
||||
Args:
|
||||
byte_value: the byte value that maps the patterns in the table.
|
||||
"""
|
||||
super(_ByteValuePatterns, self).__init__()
|
||||
self.byte_value = byte_value
|
||||
self.patterns = {}
|
||||
|
||||
def __unicode__(self):
|
||||
"""Retrieves a string representation of the byte value patterns."""
|
||||
return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
|
||||
|
||||
def AddPattern(self, pattern):
|
||||
"""Adds a pattern.
|
||||
|
||||
Args:
|
||||
pattern: the pattern (instance of Pattern).
|
||||
|
||||
Raises:
|
||||
ValueError: if the table entry already contains a pattern
|
||||
with the same identifier.
|
||||
"""
|
||||
if pattern.identifier in self.patterns:
|
||||
raise ValueError(u'Pattern {0:s} is already defined.'.format(
|
||||
pattern.identifier))
|
||||
|
||||
self.patterns[pattern.identifier] = pattern
|
||||
|
||||
def ToDebugString(self, indentation_level=1):
|
||||
"""Converts the byte value pattern into a debug string."""
|
||||
indentation = u' ' * indentation_level
|
||||
|
||||
header = u'{0:s}byte value: 0x{1:02x}\n'.format(
|
||||
indentation, ord(self.byte_value))
|
||||
|
||||
entries = u''.join([u'{0:s} patterns: {1:s}\n'.format(
|
||||
indentation, identifier) for identifier in self.patterns])
|
||||
|
||||
return u''.join([header, entries, u'\n'])
|
||||
|
||||
|
||||
class _SkipTable(object):
|
||||
"""Class that implements a skip table.
|
||||
|
||||
The skip table is used in the scan tree-based format scanner to determine
|
||||
the skip value for the Boyer–Moore–Horspool search.
|
||||
"""
|
||||
|
||||
def __init__(self, skip_pattern_length):
|
||||
"""Initializes the skip table.
|
||||
|
||||
Args:
|
||||
skip_pattern_length: the (maximum) skip pattern length.
|
||||
"""
|
||||
super(_SkipTable, self).__init__()
|
||||
self._skip_value_per_byte_value = {}
|
||||
self.skip_pattern_length = skip_pattern_length
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Retrieves a specific skip value.
|
||||
|
||||
Args:
|
||||
key: the byte value within the skip table.
|
||||
|
||||
Returns:
|
||||
the skip value for the key or the maximim skip value
|
||||
if no corresponding key was found.
|
||||
"""
|
||||
if key in self._skip_value_per_byte_value:
|
||||
return self._skip_value_per_byte_value[key]
|
||||
return self.skip_pattern_length
|
||||
|
||||
def SetSkipValue(self, byte_value, skip_value):
|
||||
"""Sets a skip value.
|
||||
|
||||
Args:
|
||||
byte_value: the corresponding byte value.
|
||||
skip_value: the number of bytes to skip.
|
||||
|
||||
Raises:
|
||||
ValueError: if byte value or skip value is out of bounds.
|
||||
"""
|
||||
if byte_value < 0 or byte_value > 255:
|
||||
raise ValueError(u'Invalid byte value, value out of bounds.')
|
||||
|
||||
if skip_value < 0 or skip_value >= self.skip_pattern_length:
|
||||
raise ValueError(u'Invalid skip value, value out of bounds.')
|
||||
|
||||
if (not byte_value in self._skip_value_per_byte_value or
|
||||
self._skip_value_per_byte_value[byte_value] > skip_value):
|
||||
self._skip_value_per_byte_value[byte_value] = skip_value
|
||||
|
||||
def ToDebugString(self):
|
||||
"""Converts the skip table into a debug string."""
|
||||
header = u'Byte value\tSkip value\n'
|
||||
|
||||
entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
|
||||
byte_value, self._skip_value_per_byte_value[byte_value])
|
||||
for byte_value in self._skip_value_per_byte_value])
|
||||
|
||||
default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
|
||||
|
||||
return u''.join([header, entries, default, u'\n'])
|
||||
|
||||
|
||||
class Pattern(object):
|
||||
"""Class that implements a pattern."""
|
||||
|
||||
def __init__(self, signature_index, signature, specification):
|
||||
"""Initializes the pattern.
|
||||
|
||||
Args:
|
||||
signature_index: the index of the signature within the specification.
|
||||
signature: the signature (instance of Signature).
|
||||
specification: the specification (instance of Specification) that
|
||||
contains the signature.
|
||||
"""
|
||||
super(Pattern, self).__init__()
|
||||
self._signature_index = signature_index
|
||||
self.signature = signature
|
||||
self.specification = specification
|
||||
|
||||
def __unicode__(self):
|
||||
"""Retrieves a string representation."""
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def expression(self):
|
||||
"""The signature expression."""
|
||||
return self.signature.expression
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
"""The identifier."""
|
||||
# Using _ here because some scanner implementation are limited to what
|
||||
# characters can be used in the identifiers.
|
||||
return u'{0:s}_{1:d}'.format(
|
||||
self.specification.identifier, self._signature_index)
|
||||
|
||||
@property
|
||||
def offset(self):
|
||||
"""The signature offset."""
|
||||
return self.signature.offset
|
||||
|
||||
@property
|
||||
def is_bound(self):
|
||||
"""Boolean value to indicate the signature is bound to an offset."""
|
||||
return self.signature.is_bound
|
||||
|
||||
|
||||
class PatternTable(object):
|
||||
"""Class that implements a pattern table.
|
||||
|
||||
The pattern table is used in the the scan tree-based format scanner
|
||||
to construct a scan tree. It contains either unbound patterns or
|
||||
patterns bound to a specific offset.
|
||||
"""
|
||||
|
||||
def __init__(self, patterns, ignore_list, is_bound=None):
|
||||
"""Initializes and builds the patterns table from patterns.
|
||||
|
||||
Args:
|
||||
patterns: a list of the patterns.
|
||||
ignore_list: a list of pattern offsets to ignore.
|
||||
is_bound: optional boolean value to indicate if the signatures are bound
|
||||
to offsets. The default is None, which means the value should
|
||||
be ignored and both bound and unbound patterns are considered
|
||||
unbound.
|
||||
|
||||
Raises:
|
||||
ValueError: if a signature pattern is too small to be useful (< 4).
|
||||
"""
|
||||
super(PatternTable, self).__init__()
|
||||
self._byte_values_per_offset = {}
|
||||
self.largest_pattern_length = 0
|
||||
self.largest_pattern_offset = 0
|
||||
self.patterns = []
|
||||
self.smallest_pattern_length = 0
|
||||
self.smallest_pattern_offset = 0
|
||||
|
||||
for pattern in patterns:
|
||||
if is_bound is not None and pattern.signature.is_bound != is_bound:
|
||||
continue
|
||||
|
||||
pattern_length = len(pattern.expression)
|
||||
|
||||
if pattern_length < 4:
|
||||
raise ValueError(u'Pattern too small to be useful.')
|
||||
|
||||
self.smallest_pattern_length = min(
|
||||
self.smallest_pattern_length, pattern_length)
|
||||
self.largest_pattern_length = max(
|
||||
self.largest_pattern_length, pattern_length)
|
||||
|
||||
self.patterns.append(pattern)
|
||||
|
||||
self._AddPattern(pattern, ignore_list, is_bound)
|
||||
|
||||
def _AddPattern(self, pattern, ignore_list, is_bound):
|
||||
"""Adds the byte values per offset in the pattern to the table.
|
||||
|
||||
Args:
|
||||
pattern: the pattern (instance of Pattern).
|
||||
ignore_list: a list of pattern offsets to ignore.
|
||||
is_bound: boolean value to indicate if the signatures are bound
|
||||
to offsets. A value of None indicates that the value should
|
||||
be ignored and both bound and unbound patterns are considered
|
||||
unbound.
|
||||
"""
|
||||
pattern_offset = pattern.offset if is_bound else 0
|
||||
|
||||
self.smallest_pattern_offset = min(
|
||||
self.smallest_pattern_offset, pattern_offset)
|
||||
self.largest_pattern_offset = max(
|
||||
self.largest_pattern_offset, pattern_offset)
|
||||
|
||||
for byte_value in pattern.expression:
|
||||
if pattern_offset not in self._byte_values_per_offset:
|
||||
self._byte_values_per_offset[pattern_offset] = {}
|
||||
|
||||
if pattern_offset not in ignore_list:
|
||||
byte_values = self._byte_values_per_offset[pattern_offset]
|
||||
|
||||
if byte_value not in byte_values:
|
||||
byte_values[byte_value] = _ByteValuePatterns(byte_value)
|
||||
|
||||
byte_value_patterns = byte_values[byte_value]
|
||||
|
||||
byte_value_patterns.AddPattern(pattern)
|
||||
|
||||
pattern_offset += 1
|
||||
|
||||
@property
|
||||
def offsets(self):
|
||||
"""The offsets."""
|
||||
return self._byte_values_per_offset.keys()
|
||||
|
||||
def GetByteValues(self, pattern_offset):
|
||||
"""Returns the bytes values for a specific pattern offset."""
|
||||
return self._byte_values_per_offset[pattern_offset]
|
||||
|
||||
def GetSkipTable(self):
|
||||
"""Retrieves the skip table for the patterns in the table.
|
||||
|
||||
Returns:
|
||||
The skip table (instance of SkipTable).
|
||||
"""
|
||||
skip_table = _SkipTable(self.smallest_pattern_length)
|
||||
|
||||
for pattern in self.patterns:
|
||||
if pattern.expression:
|
||||
skip_value = self.smallest_pattern_length
|
||||
|
||||
for expression_index in range(0, self.smallest_pattern_length):
|
||||
skip_value -= 1
|
||||
skip_table.SetSkipValue(
|
||||
ord(pattern.expression[expression_index]), skip_value)
|
||||
|
||||
return skip_table
|
||||
|
||||
def ToDebugString(self):
|
||||
"""Converts the pattern table into a debug string."""
|
||||
header = u'Pattern offset\tByte value(s)\n'
|
||||
entries = u''
|
||||
|
||||
for pattern_offset in self._byte_values_per_offset:
|
||||
entries += u'{0:d}'.format(pattern_offset)
|
||||
|
||||
byte_values = self._byte_values_per_offset[pattern_offset]
|
||||
|
||||
for byte_value in byte_values:
|
||||
identifiers = u', '.join(
|
||||
[identifier for identifier in byte_values[byte_value].patterns])
|
||||
|
||||
entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
|
||||
|
||||
entries += u'\n'
|
||||
|
||||
return u''.join([header, entries, u'\n'])
|
||||
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The range list data type."""
|
||||
|
||||
|
||||
class Range(object):
|
||||
"""Class that implements a range object."""
|
||||
|
||||
def __init__(self, range_offset, range_size):
|
||||
"""Initializes the range object.
|
||||
|
||||
Args:
|
||||
range_offset: the range offset.
|
||||
range_size: the range size.
|
||||
|
||||
Raises:
|
||||
ValueError: if the range offset or range size is not valid.
|
||||
"""
|
||||
if range_offset < 0:
|
||||
raise ValueError(u'Invalid range offset value.')
|
||||
|
||||
if range_size < 0:
|
||||
raise ValueError(u'Invalid range size value.')
|
||||
|
||||
super(Range, self).__init__()
|
||||
self.start_offset = range_offset
|
||||
self.size = range_size
|
||||
self.end_offset = range_offset + range_size
|
||||
|
||||
|
||||
class RangeList(object):
|
||||
"""Class that implements a range list object."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the range list object."""
|
||||
super(RangeList, self).__init__()
|
||||
self.ranges = []
|
||||
|
||||
@property
|
||||
def number_of_ranges(self):
|
||||
"""The number of ranges."""
|
||||
return len(self.ranges)
|
||||
|
||||
def GetSpanningRange(self):
|
||||
"""Retrieves the range spanning the entire range list."""
|
||||
if self.number_of_ranges == 0:
|
||||
return
|
||||
|
||||
first_range = self.ranges[0]
|
||||
last_range = self.ranges[-1]
|
||||
range_size = last_range.end_offset - first_range.start_offset
|
||||
|
||||
return Range(first_range.start_offset, range_size)
|
||||
|
||||
def Insert(self, range_offset, range_size):
|
||||
"""Inserts the range defined by the offset and size in the list.
|
||||
|
||||
Note that overlapping ranges will be merged.
|
||||
|
||||
Args:
|
||||
range_offset: the range offset.
|
||||
range_size: the range size.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if the range cannot be inserted.
|
||||
ValueError: if the range offset or range size is not valid.
|
||||
"""
|
||||
if range_offset < 0:
|
||||
raise ValueError(u'Invalid range offset value.')
|
||||
|
||||
if range_size < 0:
|
||||
raise ValueError(u'Invalid range size value.')
|
||||
|
||||
insert_index = None
|
||||
merge_index = None
|
||||
|
||||
number_of_range_objects = len(self.ranges)
|
||||
|
||||
range_end_offset = range_offset + range_size
|
||||
|
||||
if number_of_range_objects == 0:
|
||||
insert_index = 0
|
||||
|
||||
else:
|
||||
range_object_index = 0
|
||||
|
||||
for range_object in self.ranges:
|
||||
# Ignore negative ranges.
|
||||
if range_object.start_offset < 0:
|
||||
range_object_index += 1
|
||||
continue
|
||||
|
||||
# Insert the range before an existing one.
|
||||
if range_end_offset < range_object.start_offset:
|
||||
insert_index = range_object_index
|
||||
break
|
||||
|
||||
# Ignore the range since the existing one overlaps it.
|
||||
if (range_offset >= range_object.start_offset and
|
||||
range_end_offset <= range_object.end_offset):
|
||||
break
|
||||
|
||||
# Merge the range since it overlaps the existing one at the end.
|
||||
if (range_offset >= range_object.start_offset and
|
||||
range_offset <= range_object.end_offset):
|
||||
merge_index = range_object_index
|
||||
break
|
||||
|
||||
# Merge the range since it overlaps the existing one at the start.
|
||||
if (range_end_offset >= range_object.start_offset and
|
||||
range_end_offset <= range_object.end_offset):
|
||||
merge_index = range_object_index
|
||||
break
|
||||
|
||||
# Merge the range since it overlaps the existing one.
|
||||
if (range_offset <= range_object.start_offset and
|
||||
range_end_offset >= range_object.end_offset):
|
||||
merge_index = range_object_index
|
||||
break
|
||||
|
||||
range_object_index += 1
|
||||
|
||||
# Insert the range after the last one.
|
||||
if range_object_index >= number_of_range_objects:
|
||||
insert_index = number_of_range_objects
|
||||
|
||||
if insert_index is not None and merge_index is not None:
|
||||
raise RuntimeError(
|
||||
u'Unable to insert the range both insert and merge specified.')
|
||||
|
||||
if insert_index is not None:
|
||||
self.ranges.insert(insert_index, Range(range_offset, range_size))
|
||||
|
||||
elif merge_index is not None:
|
||||
range_object = self.ranges[merge_index]
|
||||
if range_offset < range_object.start_offset:
|
||||
range_object.size += range_object.start_offset - range_offset
|
||||
range_object.start_offset = range_offset
|
||||
if range_end_offset > range_object.end_offset:
|
||||
range_object.size += range_end_offset - range_object.end_offset
|
||||
range_object.end_offset = range_end_offset
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for the range list."""
|
||||
|
||||
import unittest
|
||||
|
||||
from plaso.classifier import range_list
|
||||
|
||||
|
||||
class RangeListTest(unittest.TestCase):
|
||||
"""Class to test the range list."""
|
||||
|
||||
def testInsertPositiveRanges(self):
|
||||
"""Function to test the insert function using positive ranges."""
|
||||
range_list_object = range_list.RangeList()
|
||||
|
||||
# Test non-overlapping range.
|
||||
range_list_object.Insert(500, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 1)
|
||||
|
||||
range_object = range_list_object.ranges[0]
|
||||
self.assertEquals(range_object.start_offset, 500)
|
||||
self.assertEquals(range_object.end_offset, 600)
|
||||
self.assertEquals(range_object.size, 100)
|
||||
|
||||
# Test non-overlapping range.
|
||||
range_list_object.Insert(2000, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 2)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 2000)
|
||||
self.assertEquals(range_object.end_offset, 2100)
|
||||
self.assertEquals(range_object.size, 100)
|
||||
|
||||
# Test range that overlaps with an existing range at the start.
|
||||
range_list_object.Insert(1950, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 2)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 1950)
|
||||
self.assertEquals(range_object.end_offset, 2100)
|
||||
self.assertEquals(range_object.size, 150)
|
||||
|
||||
# Test range that overlaps with an existing range at the end.
|
||||
range_list_object.Insert(2050, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 2)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 1950)
|
||||
self.assertEquals(range_object.end_offset, 2150)
|
||||
self.assertEquals(range_object.size, 200)
|
||||
|
||||
# Test non-overlapping range.
|
||||
range_list_object.Insert(1000, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 3)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 1000)
|
||||
self.assertEquals(range_object.end_offset, 1100)
|
||||
self.assertEquals(range_object.size, 100)
|
||||
|
||||
# Test range that aligns with an existing range at the end.
|
||||
range_list_object.Insert(1100, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 3)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 1000)
|
||||
self.assertEquals(range_object.end_offset, 1200)
|
||||
self.assertEquals(range_object.size, 200)
|
||||
|
||||
# Test range that aligns with an existing range at the start.
|
||||
range_list_object.Insert(900, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 3)
|
||||
|
||||
range_object = range_list_object.ranges[1]
|
||||
self.assertEquals(range_object.start_offset, 900)
|
||||
self.assertEquals(range_object.end_offset, 1200)
|
||||
self.assertEquals(range_object.size, 300)
|
||||
|
||||
# Test non-overlapping range.
|
||||
range_list_object.Insert(0, 100)
|
||||
self.assertEquals(range_list_object.number_of_ranges, 4)
|
||||
|
||||
range_object = range_list_object.ranges[0]
|
||||
self.assertEquals(range_object.start_offset, 0)
|
||||
self.assertEquals(range_object.end_offset, 100)
|
||||
self.assertEquals(range_object.size, 100)
|
||||
|
||||
# Test invalid ranges.
|
||||
with self.assertRaises(ValueError):
|
||||
range_list_object.Insert(-1, 100)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
range_list_object.Insert(3000, -100)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,744 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The scan tree classes used by the scan tree-based format scanner."""
|
||||
|
||||
import logging
|
||||
|
||||
from plaso.classifier import patterns
|
||||
from plaso.classifier import range_list
|
||||
|
||||
|
||||
class _PatternWeights(object):
|
||||
"""Class that implements pattern weights."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the pattern weights."""
|
||||
super(_PatternWeights, self).__init__()
|
||||
self._offsets_per_weight = {}
|
||||
self._weight_per_offset = {}
|
||||
|
||||
def AddOffset(self, pattern_offset):
|
||||
"""Adds a pattern offset and sets its weight to 0.
|
||||
|
||||
Args:
|
||||
pattern_offset: the pattern offset to add to the pattern weights.
|
||||
|
||||
Raises:
|
||||
ValueError: if the pattern weights already contains the pattern offset.
|
||||
"""
|
||||
if pattern_offset in self._weight_per_offset:
|
||||
raise ValueError(u'Pattern offset already set.')
|
||||
|
||||
self._weight_per_offset[pattern_offset] = 0
|
||||
|
||||
def AddWeight(self, pattern_offset, weight):
|
||||
"""Adds a weight for a specific pattern offset.
|
||||
|
||||
Args:
|
||||
pattern_offset: the pattern offset to add to the pattern weights.
|
||||
weight: the corresponding weight to add.
|
||||
|
||||
Raises:
|
||||
ValueError: if the pattern weights does not contain the pattern offset.
|
||||
"""
|
||||
if pattern_offset not in self._weight_per_offset:
|
||||
raise ValueError(u'Pattern offset not set.')
|
||||
|
||||
self._weight_per_offset[pattern_offset] += weight
|
||||
|
||||
if weight not in self._offsets_per_weight:
|
||||
self._offsets_per_weight[weight] = []
|
||||
|
||||
self._offsets_per_weight[weight].append(pattern_offset)
|
||||
|
||||
def GetLargestWeight(self):
|
||||
"""Retrieves the largest weight or 0 if none."""
|
||||
if self._offsets_per_weight:
|
||||
return max(self._offsets_per_weight)
|
||||
|
||||
return 0
|
||||
|
||||
def GetOffsetsForWeight(self, weight):
|
||||
"""Retrieves the list of offsets for a specific weight."""
|
||||
return self._offsets_per_weight[weight]
|
||||
|
||||
def GetWeightForOffset(self, pattern_offset):
|
||||
"""Retrieves the weight for a specific pattern offset."""
|
||||
return self._weight_per_offset[pattern_offset]
|
||||
|
||||
def ToDebugString(self):
|
||||
"""Converts the pattern weights into a debug string."""
|
||||
header1 = u'Pattern offset\tWeight\n'
|
||||
|
||||
entries1 = u''.join([u'{0:d}\t{1:d}\n'.format(
|
||||
pattern_offset, self._weight_per_offset[pattern_offset])
|
||||
for pattern_offset in self._weight_per_offset])
|
||||
|
||||
header2 = u'Weight\tPattern offset(s)\n'
|
||||
|
||||
entries2 = u''.join([u'{0:d}\t{1!s}\n'.format(
|
||||
weight, self._offsets_per_weight[weight])
|
||||
for weight in self._offsets_per_weight])
|
||||
|
||||
return u''.join([header1, entries1, u'\n', header2, entries2, u'\n'])
|
||||
|
||||
def SetWeight(self, pattern_offset, weight):
|
||||
"""Sets a weight for a specific pattern offset.
|
||||
|
||||
Args:
|
||||
pattern_offset: the pattern offset to set in the pattern weights.
|
||||
weight: the corresponding weight to set.
|
||||
|
||||
Raises:
|
||||
ValueError: if the pattern weights does not contain the pattern offset.
|
||||
"""
|
||||
if pattern_offset not in self._weight_per_offset:
|
||||
raise ValueError(u'Pattern offset not set.')
|
||||
|
||||
self._weight_per_offset[pattern_offset] = weight
|
||||
|
||||
if weight not in self._offsets_per_weight:
|
||||
self._offsets_per_weight[weight] = []
|
||||
|
||||
self._offsets_per_weight[weight].append(pattern_offset)
|
||||
|
||||
|
||||
class ScanTree(object):
|
||||
"""Class that implements a scan tree."""
|
||||
|
||||
_COMMON_BYTE_VALUES = frozenset(
|
||||
'\x00\x01\xff\t\n\r 0123456789'
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
'abcdefghijklmnopqrstuvwxyz')
|
||||
|
||||
# The offset must be positive, negative offsets are ignored.
|
||||
OFFSET_MODE_POSITIVE = 1
|
||||
# The offset must be negative, positive offsets are ignored.
|
||||
OFFSET_MODE_NEGATIVE = 2
|
||||
# The offset must be positive, an error is raised for negative offsets.
|
||||
OFFSET_MODE_POSITIVE_STRICT = 3
|
||||
# The offset must be negative, an error is raised for positive offsets.
|
||||
OFFSET_MODE_NEGATIVE_STRICT = 4
|
||||
|
||||
def __init__(
|
||||
self, specification_store, is_bound,
|
||||
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
|
||||
"""Initializes and builds the scan tree.
|
||||
|
||||
Args:
|
||||
specification_store: the specification store (instance of
|
||||
SpecificationStore) that contains the format
|
||||
specifications.
|
||||
is_bound: boolean value to indicate if the signatures are bound
|
||||
to offsets. A value of None indicates that the value should
|
||||
be ignored and both bound and unbound patterns are considered
|
||||
unbound.
|
||||
offset_mode: optional value to indicate how the signature offsets should
|
||||
be handled. The default is that the offset must be positive
|
||||
and an error is raised for negative offsets.
|
||||
"""
|
||||
super(ScanTree, self).__init__()
|
||||
self.largest_length = 0
|
||||
self.pattern_list = []
|
||||
self.range_list = range_list.RangeList()
|
||||
self.root_node = None
|
||||
self.skip_table = None
|
||||
|
||||
# First determine all the patterns from the specification store.
|
||||
self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode)
|
||||
|
||||
# Next create the scan tree starting with the root node.
|
||||
ignore_list = []
|
||||
pattern_table = patterns.PatternTable(
|
||||
self.pattern_list, ignore_list, is_bound)
|
||||
|
||||
if pattern_table.patterns:
|
||||
self.root_node = self._BuildScanTreeNode(
|
||||
pattern_table, ignore_list, is_bound)
|
||||
|
||||
logging.debug(u'Scan tree:\n{0:s}'.format(
|
||||
self.root_node.ToDebugString()))
|
||||
|
||||
# At the end the skip table is determined to provide for the
|
||||
# Boyer–Moore–Horspool skip value.
|
||||
self.skip_table = pattern_table.GetSkipTable()
|
||||
|
||||
logging.debug(u'Skip table:\n{0:s}'.format(
|
||||
self.skip_table.ToDebugString()))
|
||||
|
||||
self.largest_length = pattern_table.largest_pattern_length
|
||||
|
||||
def _BuildPatterns(
|
||||
self, specification_store, is_bound,
|
||||
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
|
||||
"""Builds the list of patterns.
|
||||
|
||||
Args:
|
||||
specification_store: the specification store (instance of
|
||||
SpecificationStore) that contains the format
|
||||
specifications.
|
||||
is_bound: boolean value to indicate if the signatures are bound
|
||||
to offsets. A value of None indicates that the value should
|
||||
be ignored and both bound and unbound patterns are considered
|
||||
unbound.
|
||||
offset_mode: optional value to indicate how the signature offsets should
|
||||
be handled. The default is that the offset must be positive
|
||||
and an error is raised for negative offsets.
|
||||
|
||||
Raises:
|
||||
ValueError: if a signature offset invalid according to specified offset
|
||||
mode or a signature pattern is too small to be useful (< 4).
|
||||
"""
|
||||
self.pattern_list = []
|
||||
|
||||
for specification in specification_store.specifications:
|
||||
signature_index = 0
|
||||
|
||||
for signature in specification.signatures:
|
||||
if signature.expression:
|
||||
signature_offset = signature.offset if is_bound else 0
|
||||
signature_pattern_length = len(signature.expression)
|
||||
|
||||
# Make sure signature offset is numeric.
|
||||
try:
|
||||
signature_offset = int(signature_offset)
|
||||
except (TypeError, ValueError):
|
||||
signature_offset = 0
|
||||
|
||||
if signature_offset < 0:
|
||||
if offset_mode == self.OFFSET_MODE_POSITIVE:
|
||||
continue
|
||||
elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT:
|
||||
raise ValueError(u'Signature offset less than 0.')
|
||||
|
||||
# The range list does not allow offsets to be negative and thus
|
||||
# the signature offset is turned into a positive equivalent.
|
||||
signature_offset *= -1
|
||||
|
||||
# The signature size is substracted to make sure the spanning
|
||||
# range will align with the original negative offset values.
|
||||
signature_offset -= signature_pattern_length
|
||||
|
||||
elif signature_offset > 0:
|
||||
if offset_mode == self.OFFSET_MODE_NEGATIVE:
|
||||
continue
|
||||
elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT:
|
||||
raise ValueError(u'Signature offset greater than 0.')
|
||||
|
||||
if signature_pattern_length < 4:
|
||||
raise ValueError(u'Signature pattern smaller than 4.')
|
||||
|
||||
pattern = patterns.Pattern(
|
||||
signature_index, signature, specification)
|
||||
self.pattern_list.append(pattern)
|
||||
self.range_list.Insert(signature_offset, signature_pattern_length)
|
||||
|
||||
signature_index += 1
|
||||
|
||||
def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound):
|
||||
"""Builds a scan tree node.
|
||||
|
||||
Args:
|
||||
pattern_table: a pattern table (instance of PatternTable).
|
||||
ignore_list: a list of pattern offsets to ignore
|
||||
is_bound: boolean value to indicate if the signatures are bound
|
||||
to offsets. A value of None indicates that the value should
|
||||
be ignored and both bound and unbound patterns are considered
|
||||
unbound.
|
||||
|
||||
Raises:
|
||||
ValueError: if number of byte value patterns value out of bounds.
|
||||
|
||||
Returns:
|
||||
A scan tree node (instance of ScanTreeNode).
|
||||
"""
|
||||
# Make a copy of the lists because the function is going to alter them
|
||||
# and the changes must remain in scope of the function.
|
||||
pattern_list = list(pattern_table.patterns)
|
||||
ignore_list = list(ignore_list)
|
||||
|
||||
similarity_weights = _PatternWeights()
|
||||
occurrence_weights = _PatternWeights()
|
||||
value_weights = _PatternWeights()
|
||||
|
||||
for pattern_offset in pattern_table.offsets:
|
||||
similarity_weights.AddOffset(pattern_offset)
|
||||
occurrence_weights.AddOffset(pattern_offset)
|
||||
value_weights.AddOffset(pattern_offset)
|
||||
|
||||
byte_values = pattern_table.GetByteValues(pattern_offset)
|
||||
number_of_byte_values = len(byte_values)
|
||||
|
||||
if number_of_byte_values > 1:
|
||||
occurrence_weights.SetWeight(pattern_offset, number_of_byte_values)
|
||||
|
||||
for byte_value in byte_values:
|
||||
byte_value_patterns = byte_values[byte_value]
|
||||
byte_value_weight = len(byte_value_patterns.patterns)
|
||||
|
||||
if byte_value_weight > 1:
|
||||
similarity_weights.AddWeight(pattern_offset, byte_value_weight)
|
||||
|
||||
if byte_value_weight not in self._COMMON_BYTE_VALUES:
|
||||
value_weights.AddWeight(pattern_offset, 1)
|
||||
|
||||
logging.debug(u'Pattern table:\n{0:s}'.format(
|
||||
pattern_table.ToDebugString()))
|
||||
logging.debug(u'Similarity weights:\n{0:s}'.format(
|
||||
similarity_weights.ToDebugString()))
|
||||
logging.debug(u'Occurrence weights:\n{0:s}'.format(
|
||||
occurrence_weights.ToDebugString()))
|
||||
logging.debug(u'Value weights:\n{0:s}'.format(
|
||||
value_weights.ToDebugString()))
|
||||
|
||||
pattern_offset = self._GetMostSignificantPatternOffset(
|
||||
pattern_list, similarity_weights, occurrence_weights, value_weights)
|
||||
|
||||
ignore_list.append(pattern_offset)
|
||||
|
||||
# For the scan tree negative offsets are adjusted so that
|
||||
# the smallest pattern offset is 0.
|
||||
scan_tree_pattern_offset = pattern_offset
|
||||
if scan_tree_pattern_offset < 0:
|
||||
scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset
|
||||
|
||||
scan_tree_node = ScanTreeNode(scan_tree_pattern_offset)
|
||||
|
||||
byte_values = pattern_table.GetByteValues(pattern_offset)
|
||||
|
||||
for byte_value in byte_values:
|
||||
byte_value_patterns = byte_values[byte_value]
|
||||
|
||||
logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString()))
|
||||
|
||||
number_of_byte_value_patterns = len(byte_value_patterns.patterns)
|
||||
|
||||
if number_of_byte_value_patterns <= 0:
|
||||
raise ValueError(
|
||||
u'Invalid number of byte value patterns value out of bounds.')
|
||||
|
||||
elif number_of_byte_value_patterns == 1:
|
||||
for identifier in byte_value_patterns.patterns:
|
||||
logging.debug(
|
||||
u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format(
|
||||
identifier, ord(byte_value)))
|
||||
|
||||
scan_tree_node.AddByteValue(
|
||||
byte_value, byte_value_patterns.patterns[identifier])
|
||||
|
||||
else:
|
||||
pattern_table = patterns.PatternTable(
|
||||
byte_value_patterns.patterns.itervalues(), ignore_list, is_bound)
|
||||
|
||||
scan_sub_node = self._BuildScanTreeNode(
|
||||
pattern_table, ignore_list, is_bound)
|
||||
|
||||
logging.debug(
|
||||
u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format(
|
||||
ord(byte_value), scan_sub_node.ToDebugString()))
|
||||
|
||||
scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node)
|
||||
|
||||
for identifier in byte_value_patterns.patterns:
|
||||
logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format(
|
||||
identifier, self._PatternsToDebugString(pattern_list)))
|
||||
|
||||
pattern_list.remove(byte_value_patterns.patterns[identifier])
|
||||
|
||||
logging.debug(u'Remaining patterns:\n{0:s}'.format(
|
||||
self._PatternsToDebugString(pattern_list)))
|
||||
|
||||
number_of_patterns = len(pattern_list)
|
||||
|
||||
if number_of_patterns == 1:
|
||||
logging.debug(u'Setting pattern: {0:s} for default value'.format(
|
||||
pattern_list[0].identifier))
|
||||
|
||||
scan_tree_node.SetDefaultValue(pattern_list[0])
|
||||
|
||||
elif number_of_patterns > 1:
|
||||
pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound)
|
||||
|
||||
scan_sub_node = self._BuildScanTreeNode(
|
||||
pattern_table, ignore_list, is_bound)
|
||||
|
||||
logging.debug(u'Setting scan node for default value:\n{0:s}'.format(
|
||||
scan_sub_node.ToDebugString()))
|
||||
|
||||
scan_tree_node.SetDefaultValue(scan_sub_node)
|
||||
|
||||
return scan_tree_node
|
||||
|
||||
def _GetMostSignificantPatternOffset(
|
||||
self, pattern_list, similarity_weights, occurrence_weights,
|
||||
value_weights):
|
||||
"""Returns the most significant pattern offset.
|
||||
|
||||
Args:
|
||||
pattern_list: a list of patterns
|
||||
similarity_weights: the similarity (pattern) weights.
|
||||
occurrence_weights: the occurrence (pattern) weights.
|
||||
value_weights: the value (pattern) weights.
|
||||
|
||||
Raises:
|
||||
ValueError: when pattern is an empty list.
|
||||
|
||||
Returns:
|
||||
a pattern offset.
|
||||
"""
|
||||
if not pattern_list:
|
||||
raise ValueError(u'Missing pattern list.')
|
||||
|
||||
pattern_offset = None
|
||||
number_of_patterns = len(pattern_list)
|
||||
|
||||
if number_of_patterns == 1:
|
||||
pattern_offset = self._GetPatternOffsetForValueWeights(
|
||||
value_weights)
|
||||
|
||||
elif number_of_patterns == 2:
|
||||
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
|
||||
occurrence_weights, value_weights)
|
||||
|
||||
elif number_of_patterns > 2:
|
||||
pattern_offset = self._GetPatternOffsetForSimilarityWeights(
|
||||
similarity_weights, occurrence_weights, value_weights)
|
||||
|
||||
logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset))
|
||||
|
||||
return pattern_offset
|
||||
|
||||
def _GetPatternOffsetForOccurrenceWeights(
|
||||
self, occurrence_weights, value_weights):
|
||||
"""Returns the most significant pattern offset based on the value weights.
|
||||
|
||||
Args:
|
||||
occurrence_weights: the occurrence (pattern) weights.
|
||||
value_weights: the value (pattern) weights.
|
||||
|
||||
Returns:
|
||||
a pattern offset.
|
||||
"""
|
||||
debug_string = ""
|
||||
pattern_offset = None
|
||||
|
||||
largest_weight = occurrence_weights.GetLargestWeight()
|
||||
logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight))
|
||||
|
||||
if largest_weight > 0:
|
||||
occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight(
|
||||
largest_weight)
|
||||
number_of_occurrence_offsets = len(occurrence_weight_offsets)
|
||||
else:
|
||||
number_of_occurrence_offsets = 0
|
||||
|
||||
if number_of_occurrence_offsets == 0:
|
||||
pattern_offset = self._GetPatternOffsetForValueWeights(
|
||||
value_weights)
|
||||
|
||||
elif number_of_occurrence_offsets == 1:
|
||||
pattern_offset = occurrence_weight_offsets[0]
|
||||
|
||||
else:
|
||||
largest_weight = 0
|
||||
largest_value_weight = 0
|
||||
|
||||
for occurrence_offset in occurrence_weight_offsets:
|
||||
value_weight = value_weights.GetWeightForOffset(
|
||||
occurrence_offset)
|
||||
|
||||
debug_string = (
|
||||
u'Occurrence offset: {0:d} value weight: {1:d}').format(
|
||||
occurrence_offset, value_weight)
|
||||
|
||||
if not pattern_offset or largest_weight < value_weight:
|
||||
largest_weight = value_weight
|
||||
pattern_offset = occurrence_offset
|
||||
|
||||
debug_string += u' largest value weight: {0:d}'.format(
|
||||
largest_value_weight)
|
||||
|
||||
logging.debug(u'{0:s}'.format(debug_string))
|
||||
|
||||
return pattern_offset
|
||||
|
||||
def _GetPatternOffsetForSimilarityWeights(
|
||||
self, similarity_weights, occurrence_weights, value_weights):
|
||||
"""Returns the most significant pattern offset.
|
||||
|
||||
Args:
|
||||
similarity_weights: the similarity (pattern) weights.
|
||||
occurrence_weights: the occurrence (pattern) weights.
|
||||
value_weights: the value (pattern) weights.
|
||||
|
||||
Returns:
|
||||
a pattern offset.
|
||||
"""
|
||||
debug_string = ""
|
||||
pattern_offset = None
|
||||
|
||||
largest_weight = similarity_weights.GetLargestWeight()
|
||||
logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight))
|
||||
|
||||
if largest_weight > 0:
|
||||
similarity_weight_offsets = similarity_weights.GetOffsetsForWeight(
|
||||
largest_weight)
|
||||
number_of_similarity_offsets = len(similarity_weight_offsets)
|
||||
else:
|
||||
number_of_similarity_offsets = 0
|
||||
|
||||
if number_of_similarity_offsets == 0:
|
||||
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
|
||||
occurrence_weights, value_weights)
|
||||
|
||||
elif number_of_similarity_offsets == 1:
|
||||
pattern_offset = similarity_weight_offsets[0]
|
||||
|
||||
else:
|
||||
largest_weight = 0
|
||||
largest_value_weight = 0
|
||||
|
||||
for similarity_offset in similarity_weight_offsets:
|
||||
occurrence_weight = occurrence_weights.GetWeightForOffset(
|
||||
similarity_offset)
|
||||
|
||||
debug_string = (
|
||||
u'Similarity offset: {0:d} occurrence weight: {1:d}').format(
|
||||
similarity_offset, occurrence_weight)
|
||||
|
||||
if largest_weight > 0 and largest_weight == occurrence_weight:
|
||||
value_weight = value_weights.GetWeightForOffset(
|
||||
similarity_offset)
|
||||
|
||||
debug_string += u' value weight: {0:d}'.format(value_weight)
|
||||
|
||||
if largest_value_weight < value_weight:
|
||||
largest_weight = 0
|
||||
|
||||
if not pattern_offset or largest_weight < occurrence_weight:
|
||||
largest_weight = occurrence_weight
|
||||
pattern_offset = similarity_offset
|
||||
|
||||
largest_value_weight = value_weights.GetWeightForOffset(
|
||||
similarity_offset)
|
||||
|
||||
debug_string += u' largest value weight: {0:d}'.format(
|
||||
largest_value_weight)
|
||||
|
||||
logging.debug(u'{0:s}'.format(debug_string))
|
||||
|
||||
return pattern_offset
|
||||
|
||||
def _GetPatternOffsetForValueWeights(
|
||||
self, value_weights):
|
||||
"""Returns the most significant pattern offset based on the value weights.
|
||||
|
||||
Args:
|
||||
value_weights: the value (pattern) weights.
|
||||
|
||||
Raises:
|
||||
RuntimeError: no value weight offset were found.
|
||||
|
||||
Returns:
|
||||
a pattern offset.
|
||||
"""
|
||||
largest_weight = value_weights.GetLargestWeight()
|
||||
logging.debug(u'Largest value weight: {0:d}'.format(largest_weight))
|
||||
|
||||
if largest_weight > 0:
|
||||
value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight)
|
||||
number_of_value_offsets = len(value_weight_offsets)
|
||||
else:
|
||||
number_of_value_offsets = 0
|
||||
|
||||
if number_of_value_offsets == 0:
|
||||
raise RuntimeError(u'No value weight offsets found.')
|
||||
|
||||
return value_weight_offsets[0]
|
||||
|
||||
def _PatternsToDebugString(self, pattern_list):
|
||||
"""Converts the list of patterns into a debug string."""
|
||||
entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list])
|
||||
|
||||
return u''.join([u'[', entries, u']'])
|
||||
|
||||
|
||||
class ScanTreeNode(object):
|
||||
"""Class that implements a scan tree node."""
|
||||
|
||||
def __init__(self, pattern_offset):
|
||||
"""Initializes the scan tree node.
|
||||
|
||||
Args:
|
||||
pattern_offset: the offset in the pattern to which the node
|
||||
applies.
|
||||
"""
|
||||
super(ScanTreeNode, self).__init__()
|
||||
self._byte_values = {}
|
||||
self.default_value = None
|
||||
self.parent = None
|
||||
self.pattern_offset = pattern_offset
|
||||
|
||||
def AddByteValue(self, byte_value, scan_object):
|
||||
"""Adds a byte value.
|
||||
|
||||
Args:
|
||||
byte_value: the corresponding byte value.
|
||||
scan_object: the scan object, either a scan sub node or a pattern.
|
||||
|
||||
Raises:
|
||||
ValueError: if byte value is out of bounds or if the node already
|
||||
contains a scan object for the byte value.
|
||||
"""
|
||||
if isinstance(byte_value, str):
|
||||
byte_value = ord(byte_value)
|
||||
|
||||
if byte_value < 0 or byte_value > 255:
|
||||
raise ValueError(u'Invalid byte value, value out of bounds.')
|
||||
|
||||
if byte_value in self._byte_values:
|
||||
raise ValueError(u'Byte value already set.')
|
||||
|
||||
if isinstance(scan_object, ScanTreeNode):
|
||||
scan_object.parent = self
|
||||
|
||||
self._byte_values[byte_value] = scan_object
|
||||
|
||||
def CompareByteValue(
|
||||
self, data, data_offset, data_size, total_data_offset,
|
||||
total_data_size=None):
|
||||
"""Scans a buffer using the bounded scan tree.
|
||||
|
||||
This function will return partial matches on the ata block block
|
||||
boundary as long as the total data size has not been reached.
|
||||
|
||||
Args:
|
||||
data: a buffer containing raw data.
|
||||
data_offset: the offset in the raw data in the buffer.
|
||||
data_size: the size of the raw data in the buffer.
|
||||
total_data_offset: the offset of the data relative to the start of
|
||||
the total data scanned.
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
|
||||
Returns:
|
||||
the resulting scan object which is either a ScanTreeNode or Pattern
|
||||
or None.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if the data offset, total data offset, total data size
|
||||
or pattern offset value is out of bounds.
|
||||
"""
|
||||
found_match = False
|
||||
scan_tree_byte_value = 0
|
||||
|
||||
if data_offset < 0 or data_offset >= data_size:
|
||||
raise RuntimeError(u'Invalid data offset, value out of bounds.')
|
||||
|
||||
if total_data_size is not None and total_data_size < 0:
|
||||
raise RuntimeError(u'Invalid total data size, value out of bounds.')
|
||||
|
||||
if total_data_offset < 0 or (
|
||||
total_data_size is not None and total_data_offset >= total_data_size):
|
||||
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
|
||||
|
||||
if (total_data_size is not None and
|
||||
total_data_offset + data_size >= total_data_size):
|
||||
match_on_boundary = True
|
||||
else:
|
||||
match_on_boundary = False
|
||||
|
||||
data_offset += self.pattern_offset
|
||||
|
||||
if not match_on_boundary and data_offset >= data_size:
|
||||
raise RuntimeError(u'Invalid pattern offset value, out of bounds.')
|
||||
|
||||
if data_offset < data_size:
|
||||
data_byte_value = ord(data[data_offset])
|
||||
|
||||
for scan_tree_byte_value in self._byte_values:
|
||||
if data_byte_value == scan_tree_byte_value:
|
||||
found_match = True
|
||||
break
|
||||
|
||||
if found_match:
|
||||
scan_object = self._byte_values[scan_tree_byte_value]
|
||||
|
||||
logging.debug(
|
||||
u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset)
|
||||
)
|
||||
|
||||
else:
|
||||
scan_object = self.default_value
|
||||
|
||||
if not scan_object:
|
||||
scan_object = self.parent
|
||||
while scan_object and not scan_object.default_value:
|
||||
scan_object = scan_object.parent
|
||||
|
||||
if scan_object:
|
||||
scan_object = scan_object.default_value
|
||||
|
||||
return scan_object
|
||||
|
||||
def SetDefaultValue(self, scan_object):
|
||||
"""Sets the default (non-match) value.
|
||||
|
||||
Args:
|
||||
scan_object: the scan object, either a scan sub node or a pattern.
|
||||
|
||||
Raises:
|
||||
ValueError: if the default value is already set.
|
||||
"""
|
||||
if self.default_value:
|
||||
raise ValueError(u'Default value already set.')
|
||||
|
||||
self.default_value = scan_object
|
||||
|
||||
def ToDebugString(self, indentation_level=1):
|
||||
"""Converts the scan tree node into a debug string."""
|
||||
indentation = u' ' * indentation_level
|
||||
|
||||
header = u'{0:s}pattern offset: {1:d}\n'.format(
|
||||
indentation, self.pattern_offset)
|
||||
|
||||
entries = u''
|
||||
|
||||
for byte_value in self._byte_values:
|
||||
entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value)
|
||||
|
||||
if isinstance(self._byte_values[byte_value], ScanTreeNode):
|
||||
entries += u'{0:s}scan tree node:\n'.format(indentation)
|
||||
entries += self._byte_values[byte_value].ToDebugString(
|
||||
indentation_level + 1)
|
||||
|
||||
elif isinstance(self._byte_values[byte_value], patterns.Pattern):
|
||||
entries += u'{0:s}pattern: {1:s}\n'.format(
|
||||
indentation, self._byte_values[byte_value].identifier)
|
||||
|
||||
default = u'{0:s}default value:\n'.format(indentation)
|
||||
|
||||
if isinstance(self.default_value, ScanTreeNode):
|
||||
default += u'{0:s}scan tree node:\n'.format(indentation)
|
||||
default += self.default_value.ToDebugString(indentation_level + 1)
|
||||
|
||||
elif isinstance(self.default_value, patterns.Pattern):
|
||||
default += u'{0:s}pattern: {1:s}\n'.format(
|
||||
indentation, self.default_value.identifier)
|
||||
|
||||
return u''.join([header, entries, default, u'\n'])
|
||||
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains tests for the scan tree classes."""
|
||||
|
||||
import unittest
|
||||
|
||||
from plaso.classifier import patterns
|
||||
from plaso.classifier import scan_tree
|
||||
from plaso.classifier import specification
|
||||
|
||||
|
||||
class ScanTreeNodeTest(unittest.TestCase):
|
||||
"""Class to test the scan tree node."""
|
||||
|
||||
def testAddByteValueWithPattern(self):
|
||||
"""Function to test the add byte value with pattern function."""
|
||||
scan_node = scan_tree.ScanTreeNode(0)
|
||||
|
||||
format_regf = specification.Specification('REGF')
|
||||
format_regf.AddNewSignature('regf', offset=0)
|
||||
|
||||
format_esedb = specification.Specification('ESEDB')
|
||||
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
|
||||
|
||||
signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4)
|
||||
signature_regf = specification.Signature('regf', offset=0)
|
||||
|
||||
pattern_regf = patterns.Pattern(0, signature_regf, format_regf)
|
||||
pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb)
|
||||
|
||||
scan_node.AddByteValue('r', pattern_regf)
|
||||
scan_node.AddByteValue('\xef', pattern_esedb)
|
||||
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, 'r', pattern_regf)
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, -1, pattern_regf)
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, 256, pattern_regf)
|
||||
|
||||
def testAddByteValueWithScanNode(self):
|
||||
"""Function to test the add byte value with scan node function."""
|
||||
scan_node = scan_tree.ScanTreeNode(0)
|
||||
scan_sub_node_0x41 = scan_tree.ScanTreeNode(1)
|
||||
scan_sub_node_0x80 = scan_tree.ScanTreeNode(1)
|
||||
|
||||
scan_node.AddByteValue(0x41, scan_sub_node_0x41)
|
||||
scan_node.AddByteValue(0x80, scan_sub_node_0x80)
|
||||
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80)
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80)
|
||||
self.assertRaises(
|
||||
ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,749 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains the classes for a scan tree-based format scanner."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from plaso.classifier import patterns
|
||||
from plaso.classifier import range_list
|
||||
from plaso.classifier import scan_tree
|
||||
|
||||
|
||||
class _ScanMatch(object):
|
||||
"""Class that implements a scan match."""
|
||||
|
||||
def __init__(self, total_data_offset, pattern):
|
||||
"""Initializes the scan result.
|
||||
|
||||
Args:
|
||||
total_data_offset: the offset of the resulting match relative
|
||||
to the start of the total data scanned.
|
||||
pattern: the pattern matched.
|
||||
"""
|
||||
super(_ScanMatch, self).__init__()
|
||||
self.total_data_offset = total_data_offset
|
||||
self.pattern = pattern
|
||||
|
||||
@property
|
||||
def specification(self):
|
||||
"""The specification."""
|
||||
return self.pattern.specification
|
||||
|
||||
|
||||
class _ScanResult(object):
|
||||
"""Class that implements a scan result."""
|
||||
|
||||
def __init__(self, specification):
|
||||
"""Initializes the scan result.
|
||||
|
||||
Args:
|
||||
scan_tree_node: the corresponding scan tree node or None.
|
||||
"""
|
||||
super(_ScanResult, self).__init__()
|
||||
self.specification = specification
|
||||
self.scan_matches = []
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
"""The specification identifier."""
|
||||
return self.specification.identifier
|
||||
|
||||
|
||||
class ScanState(object):
|
||||
"""Class that implements a scan state."""
|
||||
|
||||
# The state definitions.
|
||||
_SCAN_STATE_START = 1
|
||||
_SCAN_STATE_SCANNING = 2
|
||||
_SCAN_STATE_STOP = 3
|
||||
|
||||
def __init__(self, scan_tree_node, total_data_size=None):
|
||||
"""Initializes the scan state.
|
||||
|
||||
Args:
|
||||
scan_tree_node: the corresponding scan tree node or None.
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
"""
|
||||
super(ScanState, self).__init__()
|
||||
self._matches = []
|
||||
self.remaining_data = None
|
||||
self.remaining_data_size = 0
|
||||
self.scan_tree_node = scan_tree_node
|
||||
self.state = self._SCAN_STATE_START
|
||||
self.total_data_offset = 0
|
||||
self.total_data_size = total_data_size
|
||||
|
||||
def AddMatch(self, total_data_offset, pattern):
|
||||
"""Adds a result to the state to scanning.
|
||||
|
||||
Args:
|
||||
total_data_offset: the offset of the resulting match relative
|
||||
to the start total data scanned.
|
||||
pattern: the pattern matched.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when a unsupported state is encountered.
|
||||
"""
|
||||
if (self.state != self._SCAN_STATE_START and
|
||||
self.state != self._SCAN_STATE_SCANNING):
|
||||
raise RuntimeError(u'Unsupported scan state.')
|
||||
|
||||
self._matches.append(_ScanMatch(total_data_offset, pattern))
|
||||
|
||||
def GetMatches(self):
|
||||
"""Retrieves a list containing the results.
|
||||
|
||||
Returns:
|
||||
A list of scan matches (instances of _ScanMatch).
|
||||
|
||||
Raises:
|
||||
RuntimeError: when a unsupported state is encountered.
|
||||
"""
|
||||
if self.state != self._SCAN_STATE_STOP:
|
||||
raise RuntimeError(u'Unsupported scan state.')
|
||||
|
||||
return self._matches
|
||||
|
||||
def Reset(self, scan_tree_node):
|
||||
"""Resets the state to start.
|
||||
|
||||
This function will clear the remaining data.
|
||||
|
||||
Args:
|
||||
scan_tree_node: the corresponding scan tree node or None.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when a unsupported state is encountered.
|
||||
"""
|
||||
if self.state != self._SCAN_STATE_STOP:
|
||||
raise RuntimeError(u'Unsupported scan state.')
|
||||
|
||||
self.remaining_data = None
|
||||
self.remaining_data_size = 0
|
||||
self.scan_tree_node = scan_tree_node
|
||||
self.state = self._SCAN_STATE_START
|
||||
|
||||
def Scanning(self, scan_tree_node, total_data_offset):
|
||||
"""Sets the state to scanning.
|
||||
|
||||
Args:
|
||||
scan_tree_node: the active scan tree node.
|
||||
total_data_offset: the offset of the resulting match relative
|
||||
to the start of the total data scanned.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when a unsupported state is encountered.
|
||||
"""
|
||||
if (self.state != self._SCAN_STATE_START and
|
||||
self.state != self._SCAN_STATE_SCANNING):
|
||||
raise RuntimeError(u'Unsupported scan state.')
|
||||
|
||||
self.scan_tree_node = scan_tree_node
|
||||
self.state = self._SCAN_STATE_SCANNING
|
||||
self.total_data_offset = total_data_offset
|
||||
|
||||
def Stop(self):
|
||||
"""Sets the state to stop.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when a unsupported state is encountered.
|
||||
"""
|
||||
if (self.state != self._SCAN_STATE_START and
|
||||
self.state != self._SCAN_STATE_SCANNING):
|
||||
raise RuntimeError(u'Unsupported scan state.')
|
||||
|
||||
self.scan_tree_node = None
|
||||
self.state = self._SCAN_STATE_STOP
|
||||
|
||||
|
||||
class ScanTreeScannerBase(object):
|
||||
"""Class that implements a scan tree-based scanner base."""
|
||||
|
||||
def __init__(self, specification_store):
|
||||
"""Initializes the scanner.
|
||||
|
||||
Args:
|
||||
specification_store: the specification store (instance of
|
||||
SpecificationStore) that contains the format
|
||||
specifications.
|
||||
"""
|
||||
super(ScanTreeScannerBase, self).__init__()
|
||||
self._scan_tree = None
|
||||
self._specification_store = specification_store
|
||||
|
||||
def _ScanBufferScanState(
|
||||
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
|
||||
total_data_size=None):
|
||||
"""Scans a buffer using the scan tree.
|
||||
|
||||
This function implements a Boyer–Moore–Horspool equivalent approach
|
||||
in combination with the scan tree.
|
||||
|
||||
Args:
|
||||
scan_tree_object: the scan tree (instance of ScanTree).
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
data: a buffer containing raw data.
|
||||
data_size: the size of the raw data in the buffer.
|
||||
total_data_offset: the offset of the data relative to the start of
|
||||
the total data scanned.
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if the total data offset, total data size or the last
|
||||
pattern offset value is out of bounds
|
||||
"""
|
||||
if total_data_size is not None and total_data_size < 0:
|
||||
raise RuntimeError(u'Invalid total data size, value out of bounds.')
|
||||
|
||||
if total_data_offset < 0 or (
|
||||
total_data_size is not None and total_data_offset >= total_data_size):
|
||||
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
|
||||
|
||||
data_offset = 0
|
||||
scan_tree_node = scan_state.scan_tree_node
|
||||
|
||||
if scan_state.remaining_data:
|
||||
# str.join() should be more efficient then concatenation by +.
|
||||
data = ''.join([scan_state.remaining_data, data])
|
||||
data_size += scan_state.remaining_data_size
|
||||
|
||||
scan_state.remaining_data = None
|
||||
scan_state.remaining_data_size = 0
|
||||
|
||||
if (total_data_size is not None and
|
||||
total_data_offset + data_size >= total_data_size):
|
||||
match_on_boundary = True
|
||||
else:
|
||||
match_on_boundary = False
|
||||
|
||||
while data_offset < data_size:
|
||||
if (not match_on_boundary and
|
||||
data_offset + scan_tree_object.largest_length >= data_size):
|
||||
break
|
||||
|
||||
found_match = False
|
||||
scan_done = False
|
||||
|
||||
while not scan_done:
|
||||
scan_object = scan_tree_node.CompareByteValue(
|
||||
data, data_offset, data_size, total_data_offset,
|
||||
total_data_size=total_data_size)
|
||||
|
||||
if isinstance(scan_object, scan_tree.ScanTreeNode):
|
||||
scan_tree_node = scan_object
|
||||
else:
|
||||
scan_done = True
|
||||
|
||||
if isinstance(scan_object, patterns.Pattern):
|
||||
pattern_length = len(scan_object.signature.expression)
|
||||
data_last_offset = data_offset + pattern_length
|
||||
|
||||
if cmp(scan_object.signature.expression,
|
||||
data[data_offset:data_last_offset]) == 0:
|
||||
|
||||
if (not scan_object.signature.is_bound or
|
||||
scan_object.signature.offset == data_offset):
|
||||
found_match = True
|
||||
|
||||
logging.debug(
|
||||
u'Signature match at data offset: 0x{0:08x}.'.format(
|
||||
data_offset))
|
||||
|
||||
scan_state.AddMatch(total_data_offset + data_offset, scan_object)
|
||||
|
||||
if found_match:
|
||||
skip_value = len(scan_object.signature.expression)
|
||||
scan_tree_node = scan_tree_object.root_node
|
||||
else:
|
||||
last_pattern_offset = (
|
||||
scan_tree_object.skip_table.skip_pattern_length - 1)
|
||||
|
||||
if data_offset + last_pattern_offset >= data_size:
|
||||
raise RuntimeError(
|
||||
u'Invalid last pattern offset, value out of bounds.')
|
||||
skip_value = 0
|
||||
|
||||
while last_pattern_offset >= 0 and not skip_value:
|
||||
last_data_offset = data_offset + last_pattern_offset
|
||||
byte_value = ord(data[last_data_offset])
|
||||
skip_value = scan_tree_object.skip_table[byte_value]
|
||||
last_pattern_offset -= 1
|
||||
|
||||
if not skip_value:
|
||||
skip_value = 1
|
||||
|
||||
scan_tree_node = scan_tree_object.root_node
|
||||
|
||||
data_offset += skip_value
|
||||
|
||||
if not match_on_boundary and data_offset < data_size:
|
||||
scan_state.remaining_data = data[data_offset:data_size]
|
||||
scan_state.remaining_data_size = data_size - data_offset
|
||||
|
||||
scan_state.Scanning(scan_tree_node, total_data_offset + data_offset)
|
||||
|
||||
def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state):
|
||||
"""Scans the remaining data in the scan state using the scan tree.
|
||||
|
||||
Args:
|
||||
scan_tree_object: the scan tree (instance of ScanTree).
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
"""
|
||||
if scan_state.remaining_data:
|
||||
data = scan_state.remaining_data
|
||||
data_size = scan_state.remaining_data_size
|
||||
|
||||
scan_state.remaining_data = None
|
||||
scan_state.remaining_data_size = 0
|
||||
|
||||
# Setting the total data size will make boundary matches are returned
|
||||
# in this scanning pass.
|
||||
total_data_size = scan_state.total_data_size
|
||||
if total_data_size is None:
|
||||
total_data_size = scan_state.total_data_offset + data_size
|
||||
|
||||
self._ScanBufferScanState(
|
||||
scan_tree_object, scan_state, data, data_size,
|
||||
scan_state.total_data_offset, total_data_size=total_data_size)
|
||||
|
||||
scan_state.Stop()
|
||||
|
||||
def GetScanResults(self, scan_state):
|
||||
"""Retrieves the scan results.
|
||||
|
||||
Args:
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
|
||||
Return:
|
||||
A list of scan results (instances of _ScanResult).
|
||||
"""
|
||||
scan_results = {}
|
||||
|
||||
for scan_match in scan_state.GetMatches():
|
||||
specification = scan_match.specification
|
||||
identifier = specification.identifier
|
||||
|
||||
logging.debug(
|
||||
u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format(
|
||||
scan_match.total_data_offset, identifier))
|
||||
|
||||
if identifier not in scan_results:
|
||||
scan_results[identifier] = _ScanResult(specification)
|
||||
|
||||
scan_results[identifier].scan_matches.append(scan_match)
|
||||
|
||||
return scan_results.values()
|
||||
|
||||
|
||||
class Scanner(ScanTreeScannerBase):
|
||||
"""Class that implements a scan tree-based scanner."""
|
||||
|
||||
_READ_BUFFER_SIZE = 512
|
||||
|
||||
def __init__(self, specification_store):
|
||||
"""Initializes the scanner.
|
||||
|
||||
Args:
|
||||
specification_store: the specification store (instance of
|
||||
SpecificationStore) that contains the format
|
||||
specifications.
|
||||
"""
|
||||
super(Scanner, self).__init__(specification_store)
|
||||
|
||||
def ScanBuffer(self, scan_state, data, data_size):
|
||||
"""Scans a buffer.
|
||||
|
||||
Args:
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
data: a buffer containing raw data.
|
||||
data_size: the size of the raw data in the buffer.
|
||||
"""
|
||||
self._ScanBufferScanState(
|
||||
self._scan_tree, scan_state, data, data_size,
|
||||
scan_state.total_data_offset,
|
||||
total_data_size=scan_state.total_data_size)
|
||||
|
||||
def ScanFileObject(self, file_object):
|
||||
"""Scans a file-like object.
|
||||
|
||||
Args:
|
||||
file_object: a file-like object.
|
||||
|
||||
Returns:
|
||||
A list of scan results (instances of ScanResult).
|
||||
"""
|
||||
file_offset = 0
|
||||
|
||||
if hasattr(file_object, 'get_size'):
|
||||
file_size = file_object.get_size()
|
||||
else:
|
||||
file_object.seek(0, os.SEEK_END)
|
||||
file_size = file_object.tell()
|
||||
|
||||
scan_state = self.StartScan(total_data_size=file_size)
|
||||
|
||||
file_object.seek(file_offset, os.SEEK_SET)
|
||||
|
||||
while file_offset < file_size:
|
||||
data = file_object.read(self._READ_BUFFER_SIZE)
|
||||
data_size = len(data)
|
||||
|
||||
if data_size == 0:
|
||||
break
|
||||
|
||||
self._ScanBufferScanState(
|
||||
self._scan_tree, scan_state, data, data_size, file_offset,
|
||||
total_data_size=file_size)
|
||||
|
||||
file_offset += data_size
|
||||
|
||||
self.StopScan(scan_state)
|
||||
|
||||
return self.GetScanResults(scan_state)
|
||||
|
||||
def StartScan(self, total_data_size=None):
|
||||
"""Starts a scan.
|
||||
|
||||
The function sets up the scanning related structures if necessary.
|
||||
|
||||
Args:
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
Returns:
|
||||
A scan state (instance of ScanState).
|
||||
|
||||
Raises:
|
||||
RuntimeError: when total data size is invalid.
|
||||
"""
|
||||
if total_data_size is not None and total_data_size < 0:
|
||||
raise RuntimeError(u'Invalid total data size.')
|
||||
|
||||
if self._scan_tree is None:
|
||||
self._scan_tree = scan_tree.ScanTree(
|
||||
self._specification_store, None)
|
||||
|
||||
return ScanState(self._scan_tree.root_node, total_data_size=total_data_size)
|
||||
|
||||
def StopScan(self, scan_state):
|
||||
"""Stops a scan.
|
||||
|
||||
Args:
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
"""
|
||||
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
|
||||
|
||||
|
||||
class OffsetBoundScanner(ScanTreeScannerBase):
|
||||
"""Class that implements an offset-bound scan tree-based scanner."""
|
||||
|
||||
_READ_BUFFER_SIZE = 512
|
||||
|
||||
def __init__(self, specification_store):
|
||||
"""Initializes the scanner.
|
||||
|
||||
Args:
|
||||
specification_store: the specification store (instance of
|
||||
SpecificationStore) that contains the format
|
||||
specifications.
|
||||
"""
|
||||
super(OffsetBoundScanner, self).__init__(specification_store)
|
||||
self._footer_scan_tree = None
|
||||
self._footer_spanning_range = None
|
||||
self._header_scan_tree = None
|
||||
self._header_spanning_range = None
|
||||
|
||||
def _GetFooterRange(self, total_data_size):
|
||||
"""Retrieves the read buffer aligned footer range.
|
||||
|
||||
Args:
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
Returns:
|
||||
A range (instance of Range).
|
||||
"""
|
||||
# The actual footer range is in reverse since the spanning footer range
|
||||
# is based on positive offsets, where 0 is the end of file.
|
||||
if self._footer_spanning_range.end_offset < total_data_size:
|
||||
footer_range_start_offset = (
|
||||
total_data_size - self._footer_spanning_range.end_offset)
|
||||
else:
|
||||
footer_range_start_offset = 0
|
||||
|
||||
# Calculate the lower bound modulus of the footer range start offset
|
||||
# in increments of the read buffer size.
|
||||
footer_range_start_offset /= self._READ_BUFFER_SIZE
|
||||
footer_range_start_offset *= self._READ_BUFFER_SIZE
|
||||
|
||||
# Calculate the upper bound modulus of the footer range size
|
||||
# in increments of the read buffer size.
|
||||
footer_range_size = self._footer_spanning_range.size
|
||||
remainder = footer_range_size % self._READ_BUFFER_SIZE
|
||||
footer_range_size /= self._READ_BUFFER_SIZE
|
||||
|
||||
if remainder > 0:
|
||||
footer_range_size += 1
|
||||
|
||||
footer_range_size *= self._READ_BUFFER_SIZE
|
||||
|
||||
return range_list.Range(footer_range_start_offset, footer_range_size)
|
||||
|
||||
def _GetHeaderRange(self):
|
||||
"""Retrieves the read buffer aligned header range.
|
||||
|
||||
Returns:
|
||||
A range (instance of Range).
|
||||
"""
|
||||
# Calculate the lower bound modulus of the header range start offset
|
||||
# in increments of the read buffer size.
|
||||
header_range_start_offset = self._header_spanning_range.start_offset
|
||||
header_range_start_offset /= self._READ_BUFFER_SIZE
|
||||
header_range_start_offset *= self._READ_BUFFER_SIZE
|
||||
|
||||
# Calculate the upper bound modulus of the header range size
|
||||
# in increments of the read buffer size.
|
||||
header_range_size = self._header_spanning_range.size
|
||||
remainder = header_range_size % self._READ_BUFFER_SIZE
|
||||
header_range_size /= self._READ_BUFFER_SIZE
|
||||
|
||||
if remainder > 0:
|
||||
header_range_size += 1
|
||||
|
||||
header_range_size *= self._READ_BUFFER_SIZE
|
||||
|
||||
return range_list.Range(header_range_start_offset, header_range_size)
|
||||
|
||||
def _ScanBufferScanState(
|
||||
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
|
||||
total_data_size=None):
|
||||
"""Scans a buffer using the scan tree.
|
||||
|
||||
This function implements a Boyer–Moore–Horspool equivalent approach
|
||||
in combination with the scan tree.
|
||||
|
||||
Args:
|
||||
scan_tree_object: the scan tree (instance of ScanTree).
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
data: a buffer containing raw data.
|
||||
data_size: the size of the raw data in the buffer.
|
||||
total_data_offset: the offset of the data relative to the start of
|
||||
the total data scanned.
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
"""
|
||||
scan_done = False
|
||||
scan_tree_node = scan_tree_object.root_node
|
||||
|
||||
while not scan_done:
|
||||
data_offset = 0
|
||||
|
||||
scan_object = scan_tree_node.CompareByteValue(
|
||||
data, data_offset, data_size, total_data_offset,
|
||||
total_data_size=total_data_size)
|
||||
|
||||
if isinstance(scan_object, scan_tree.ScanTreeNode):
|
||||
scan_tree_node = scan_object
|
||||
else:
|
||||
scan_done = True
|
||||
|
||||
if isinstance(scan_object, patterns.Pattern):
|
||||
pattern_length = len(scan_object.signature.expression)
|
||||
pattern_start_offset = scan_object.signature.offset
|
||||
pattern_end_offset = pattern_start_offset + pattern_length
|
||||
|
||||
if cmp(scan_object.signature.expression,
|
||||
data[pattern_start_offset:pattern_end_offset]) == 0:
|
||||
scan_state.AddMatch(
|
||||
total_data_offset + scan_object.signature.offset, scan_object)
|
||||
|
||||
logging.debug(
|
||||
u'Signature match at data offset: 0x{0:08x}.'.format(data_offset))
|
||||
|
||||
# TODO: implement.
|
||||
# def ScanBuffer(self, scan_state, data, data_size):
|
||||
# """Scans a buffer.
|
||||
|
||||
# Args:
|
||||
# scan_state: the scan state (instance of ScanState).
|
||||
# data: a buffer containing raw data.
|
||||
# data_size: the size of the raw data in the buffer.
|
||||
# """
|
||||
# # TODO: fix footer scanning logic.
|
||||
# # need to know the file size here for the footers.
|
||||
|
||||
# # TODO: check for clashing ranges?
|
||||
|
||||
# header_range = self._GetHeaderRange()
|
||||
# footer_range = self._GetFooterRange(scan_state.total_data_size)
|
||||
|
||||
# if self._scan_tree == self._header_scan_tree:
|
||||
# if (scan_state.total_data_offset >= header_range.start_offset and
|
||||
# scan_state.total_data_offset < header_range.end_offset):
|
||||
# self._ScanBufferScanState(
|
||||
# self._scan_tree, scan_state, data, data_size,
|
||||
# scan_state.total_data_offset,
|
||||
# total_data_size=scan_state.total_data_size)
|
||||
|
||||
# elif scan_state.total_data_offset > header_range.end_offset:
|
||||
# # TODO: implement.
|
||||
# pass
|
||||
|
||||
# if self._scan_tree == self._footer_scan_tree:
|
||||
# if (scan_state.total_data_offset >= footer_range.start_offset and
|
||||
# scan_state.total_data_offset < footer_range.end_offset):
|
||||
# self._ScanBufferScanState(
|
||||
# self._scan_tree, scan_state, data, data_size,
|
||||
# scan_state.total_data_offset,
|
||||
# total_data_size=scan_state.total_data_size)
|
||||
|
||||
def ScanFileObject(self, file_object):
|
||||
"""Scans a file-like object.
|
||||
|
||||
Args:
|
||||
file_object: a file-like object.
|
||||
|
||||
Returns:
|
||||
A scan state (instance of ScanState).
|
||||
"""
|
||||
# TODO: add support for fixed size block-based reads.
|
||||
|
||||
if hasattr(file_object, 'get_size'):
|
||||
file_size = file_object.get_size()
|
||||
else:
|
||||
file_object.seek(0, os.SEEK_END)
|
||||
file_size = file_object.tell()
|
||||
|
||||
file_offset = 0
|
||||
scan_state = self.StartScan(total_data_size=file_size)
|
||||
|
||||
if self._header_scan_tree.root_node is not None:
|
||||
header_range = self._GetHeaderRange()
|
||||
|
||||
# TODO: optimize the read by supporting fixed size block-based reads.
|
||||
# if file_offset < header_range.start_offset:
|
||||
# file_offset = header_range.start_offset
|
||||
|
||||
file_object.seek(file_offset, os.SEEK_SET)
|
||||
|
||||
# TODO: optimize the read by supporting fixed size block-based reads.
|
||||
# data = file_object.read(header_range.size)
|
||||
data = file_object.read(header_range.end_offset)
|
||||
data_size = len(data)
|
||||
|
||||
if data_size > 0:
|
||||
self._ScanBufferScanState(
|
||||
self._scan_tree, scan_state, data, data_size, file_offset,
|
||||
total_data_size=file_size)
|
||||
|
||||
file_offset += data_size
|
||||
|
||||
if self._footer_scan_tree.root_node is not None:
|
||||
self.StopScan(scan_state)
|
||||
|
||||
self._scan_tree = self._footer_scan_tree
|
||||
scan_state.Reset(self._scan_tree.root_node)
|
||||
|
||||
if self._footer_scan_tree.root_node is not None:
|
||||
footer_range = self._GetFooterRange(file_size)
|
||||
|
||||
# Note that the offset in the footer scan tree start with 0. Make sure
|
||||
# the data offset of the data being scanned is aligned with the offset
|
||||
# in the scan tree.
|
||||
if footer_range.start_offset < self._footer_spanning_range.end_offset:
|
||||
data_offset = (
|
||||
self._footer_spanning_range.end_offset - footer_range.start_offset)
|
||||
else:
|
||||
data_offset = 0
|
||||
|
||||
if file_offset < footer_range.start_offset:
|
||||
file_offset = footer_range.start_offset
|
||||
|
||||
file_object.seek(file_offset, os.SEEK_SET)
|
||||
|
||||
data = file_object.read(self._READ_BUFFER_SIZE)
|
||||
data_size = len(data)
|
||||
|
||||
if data_size > 0:
|
||||
self._ScanBufferScanState(
|
||||
self._scan_tree, scan_state, data[data_offset:],
|
||||
data_size - data_offset, file_offset + data_offset,
|
||||
total_data_size=file_size)
|
||||
|
||||
self.StopScan(scan_state)
|
||||
|
||||
return self.GetScanResults(scan_state)
|
||||
|
||||
def StartScan(self, total_data_size=None):
|
||||
"""Starts a scan.
|
||||
|
||||
The function sets up the scanning related structures if necessary.
|
||||
|
||||
Args:
|
||||
total_data_size: optional value to indicate the total data size.
|
||||
The default is None.
|
||||
Returns:
|
||||
A list of scan results (instances of ScanResult).
|
||||
|
||||
Raises:
|
||||
RuntimeError: when total data size is invalid.
|
||||
"""
|
||||
if total_data_size is None or total_data_size < 0:
|
||||
raise RuntimeError(u'Invalid total data size.')
|
||||
|
||||
if self._header_scan_tree is None:
|
||||
self._header_scan_tree = scan_tree.ScanTree(
|
||||
self._specification_store, True,
|
||||
offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE)
|
||||
|
||||
if self._header_spanning_range is None:
|
||||
spanning_range = self._header_scan_tree.range_list.GetSpanningRange()
|
||||
self._header_spanning_range = spanning_range
|
||||
|
||||
if self._footer_scan_tree is None:
|
||||
self._footer_scan_tree = scan_tree.ScanTree(
|
||||
self._specification_store, True,
|
||||
offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE)
|
||||
|
||||
if self._footer_spanning_range is None:
|
||||
spanning_range = self._footer_scan_tree.range_list.GetSpanningRange()
|
||||
self._footer_spanning_range = spanning_range
|
||||
|
||||
if self._header_scan_tree.root_node is not None:
|
||||
self._scan_tree = self._header_scan_tree
|
||||
elif self._footer_scan_tree.root_node is not None:
|
||||
self._scan_tree = self._footer_scan_tree
|
||||
else:
|
||||
self._scan_tree = None
|
||||
|
||||
if self._scan_tree is not None:
|
||||
root_node = self._scan_tree.root_node
|
||||
else:
|
||||
root_node = None
|
||||
|
||||
return ScanState(root_node, total_data_size=total_data_size)
|
||||
|
||||
def StopScan(self, scan_state):
|
||||
"""Stops a scan.
|
||||
|
||||
Args:
|
||||
scan_state: the scan state (instance of ScanState).
|
||||
"""
|
||||
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
|
||||
self._scan_tree = None
|
||||
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This file contains tests for the format scanner classes."""
|
||||
|
||||
import unittest
|
||||
|
||||
from plaso.classifier import scanner
|
||||
from plaso.classifier import test_lib
|
||||
|
||||
|
||||
class ScannerTest(unittest.TestCase):
|
||||
"""Class to test the scanner."""
|
||||
|
||||
def testInitialize(self):
|
||||
"""Function to test the initialize function."""
|
||||
store = test_lib.CreateSpecificationStore()
|
||||
|
||||
# Signature for LNK
|
||||
data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
|
||||
'\x00\x00\x00\x46')
|
||||
|
||||
# Signature for REGF
|
||||
data2 = 'regf'
|
||||
|
||||
# Random data
|
||||
data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4'
|
||||
|
||||
# Boundary scan test
|
||||
data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
|
||||
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK')
|
||||
data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
|
||||
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z')
|
||||
|
||||
# Large buffer test
|
||||
data5_size = 1024 * 1024
|
||||
data5 = '\x00' * (data5_size - 4)
|
||||
data5 += 'PK\x07\x08'
|
||||
|
||||
test_scanner = scanner.Scanner(store)
|
||||
|
||||
total_data_size = len(data1)
|
||||
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
|
||||
test_scanner.ScanBuffer(scan_state, data1, len(data1))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
scan_state = test_scanner.StartScan(total_data_size=None)
|
||||
test_scanner.ScanBuffer(scan_state, data1, len(data1))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
total_data_size = len(data2)
|
||||
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
|
||||
test_scanner.ScanBuffer(scan_state, data2, len(data2))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
scan_state = test_scanner.StartScan(total_data_size=None)
|
||||
test_scanner.ScanBuffer(scan_state, data2, len(data2))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
total_data_size = len(data3)
|
||||
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
|
||||
test_scanner.ScanBuffer(scan_state, data3, len(data3))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 0)
|
||||
|
||||
scan_state = test_scanner.StartScan(total_data_size=None)
|
||||
test_scanner.ScanBuffer(scan_state, data3, len(data3))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 0)
|
||||
|
||||
total_data_size = len(data4a) + len(data4b)
|
||||
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
|
||||
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
|
||||
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
scan_state = test_scanner.StartScan(total_data_size=None)
|
||||
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
|
||||
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
total_data_size = len(data5)
|
||||
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
|
||||
test_scanner.ScanBuffer(scan_state, data5, len(data5))
|
||||
test_scanner.StopScan(scan_state)
|
||||
|
||||
self.assertEqual(len(scan_state.GetMatches()), 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The format specification classes."""
|
||||
|
||||
|
||||
class Signature(object):
|
||||
"""Class that defines a signature of a format specification.
|
||||
|
||||
The signature consists of a byte string expression, an optional
|
||||
offset relative to the start of the data, and a value to indidate
|
||||
if the expression is bound to the offset.
|
||||
"""
|
||||
def __init__(self, expression, offset=None, is_bound=False):
|
||||
"""Initializes the signature.
|
||||
|
||||
Args:
|
||||
expression: string containing the expression of the signature.
|
||||
The expression consists of a byte string at the moment
|
||||
regular expression (regexp) are not supported.
|
||||
offset: the offset of the signature or None by default. None is used
|
||||
to indicate the signature has no offset. A positive offset
|
||||
is relative from the start of the data a negative offset
|
||||
is relative from the end of the data.
|
||||
is_bound: boolean value to indicate the signature must be bound to
|
||||
the offset or False by default.
|
||||
"""
|
||||
self.expression = expression
|
||||
self.offset = offset
|
||||
self.is_bound = is_bound
|
||||
|
||||
|
||||
class Specification(object):
|
||||
"""Class that contains a format specification."""
|
||||
|
||||
def __init__(self, identifier):
|
||||
"""Initializes the specification.
|
||||
|
||||
Args:
|
||||
identifier: string containing a unique name for the format.
|
||||
"""
|
||||
self.identifier = identifier
|
||||
self.mime_types = []
|
||||
self.signatures = []
|
||||
self.universal_type_identifiers = []
|
||||
|
||||
def AddMimeType(self, mime_type):
|
||||
"""Adds a MIME type."""
|
||||
self.mime_types.append(mime_type)
|
||||
|
||||
def AddNewSignature(self, expression, offset=None, is_bound=False):
|
||||
"""Adds a signature.
|
||||
|
||||
Args:
|
||||
expression: string containing the expression of the signature.
|
||||
offset: the offset of the signature or None by default. None is used
|
||||
to indicate the signature has no offset. A positive offset
|
||||
is relative from the start of the data a negative offset
|
||||
is relative from the end of the data.
|
||||
is_bound: boolean value to indicate the signature must be bound to
|
||||
the offset or False by default.
|
||||
"""
|
||||
self.signatures.append(
|
||||
Signature(expression, offset=offset, is_bound=is_bound))
|
||||
|
||||
def AddUniversalTypeIdentifier(self, universal_type_identifiers):
|
||||
"""Adds a Universal Type Identifier (UTI)."""
|
||||
self.universal_type_identifiers.append(universal_type_identifiers)
|
||||
|
||||
|
||||
class SpecificationStore(object):
|
||||
"""Class that servers as a store for specifications."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the specification store."""
|
||||
self._format_specifications = {}
|
||||
|
||||
@property
|
||||
def specifications(self):
|
||||
"""A specifications iterator object."""
|
||||
return self._format_specifications.itervalues()
|
||||
|
||||
def AddNewSpecification(self, identifier):
|
||||
"""Adds a new specification.
|
||||
|
||||
Args:
|
||||
identifier: a string containing the format identifier,
|
||||
which should be unique for the store.
|
||||
|
||||
Returns:
|
||||
a instance of Specification.
|
||||
|
||||
Raises:
|
||||
ValueError: if the store already contains a specification with
|
||||
the same identifier.
|
||||
"""
|
||||
if identifier in self._format_specifications:
|
||||
raise ValueError("specification {0:s} is already defined in "
|
||||
"store.".format(identifier))
|
||||
|
||||
self._format_specifications[identifier] = Specification(identifier)
|
||||
|
||||
return self._format_specifications[identifier]
|
||||
|
||||
def AddSpecification(self, specification):
|
||||
"""Adds a specification.
|
||||
|
||||
Args:
|
||||
specification: the specification (instance of Specification).
|
||||
|
||||
Raises:
|
||||
KeyError: if the store already contains a specification with
|
||||
the same identifier.
|
||||
"""
|
||||
if specification.identifier in self._format_specifications:
|
||||
raise KeyError(
|
||||
u'Specification {0:s} is already defined in store.'.format(
|
||||
specification.identifier))
|
||||
|
||||
self._format_specifications[specification.identifier] = specification
|
||||
|
||||
def ReadFromFileObject(self, unused_file_object):
|
||||
"""Reads the specification store from a file-like object.
|
||||
|
||||
Args:
|
||||
unused_file_object: A file-like object.
|
||||
|
||||
Raises:
|
||||
RuntimeError: because functionality is not implemented yet.
|
||||
"""
|
||||
# TODO: implement this function.
|
||||
raise RuntimeError(u'Function not implemented.')
|
||||
|
||||
def ReadFromFile(self, filename):
|
||||
"""Reads the specification store from a file.
|
||||
|
||||
Args:
|
||||
filename: The name of the file.
|
||||
"""
|
||||
file_object = open(filename, 'r')
|
||||
self.ReadFromFileObject(file_object)
|
||||
file_object.close()
|
||||
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for the format specification classes."""
|
||||
|
||||
import unittest
|
||||
|
||||
from plaso.classifier import specification
|
||||
|
||||
|
||||
class SpecificationStoreTest(unittest.TestCase):
|
||||
"""Class to test the specification store."""
|
||||
|
||||
def testAddSpecification(self):
|
||||
"""Function to test the add specification function."""
|
||||
store = specification.SpecificationStore()
|
||||
|
||||
format_regf = specification.Specification('REGF')
|
||||
format_regf.AddNewSignature('regf', offset=0)
|
||||
|
||||
format_esedb = specification.Specification('ESEDB')
|
||||
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
|
||||
|
||||
store.AddSpecification(format_regf)
|
||||
store.AddSpecification(format_esedb)
|
||||
|
||||
with self.assertRaises(KeyError):
|
||||
store.AddSpecification(format_regf)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Shared test cases."""
|
||||
|
||||
from plaso.classifier import specification
|
||||
|
||||
|
||||
def CreateSpecificationStore():
|
||||
"""Creates a format specification store for testing purposes.
|
||||
|
||||
Returns:
|
||||
A format specification store (instance of SpecificationStore).
|
||||
"""
|
||||
store = specification.SpecificationStore()
|
||||
|
||||
test_specification = store.AddNewSpecification('7zip')
|
||||
test_specification.AddMimeType('application/x-7z-compressed')
|
||||
test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive')
|
||||
test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0)
|
||||
|
||||
test_specification = store.AddNewSpecification('esedb')
|
||||
test_specification.AddNewSignature(
|
||||
'\xef\xcd\xab\x89', offset=4, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('evt')
|
||||
test_specification.AddNewSignature(
|
||||
'\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0,
|
||||
is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('evtx')
|
||||
test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('ewf')
|
||||
test_specification.AddNewSignature(
|
||||
'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
|
||||
|
||||
test_specification = specification.Specification('ewf_logical')
|
||||
test_specification.AddNewSignature(
|
||||
'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('lnk')
|
||||
test_specification.AddNewSignature(
|
||||
'\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
|
||||
'\x00\x00\x00\x46', offset=0)
|
||||
|
||||
test_specification = store.AddNewSpecification('msiecf_index_dat')
|
||||
test_specification.AddNewSignature(
|
||||
'Client UrlCache MMF Ver ', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('nk2')
|
||||
test_specification.AddNewSignature(
|
||||
'\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0,
|
||||
is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('olecf')
|
||||
test_specification.AddNewSignature(
|
||||
'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True)
|
||||
test_specification.AddNewSignature(
|
||||
'\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('pff')
|
||||
test_specification.AddNewSignature('!BDN', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('qcow')
|
||||
test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('rar')
|
||||
test_specification.AddMimeType('application/x-rar-compressed')
|
||||
test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive')
|
||||
test_specification.AddNewSignature(
|
||||
'Rar!\x1a\x07\x00', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('regf')
|
||||
test_specification.AddNewSignature('regf', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('thumbache_db_cache')
|
||||
test_specification.AddNewSignature('CMMM', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('thumbache_db_index')
|
||||
test_specification.AddNewSignature('IMMM', offset=0, is_bound=True)
|
||||
|
||||
test_specification = store.AddNewSpecification('zip')
|
||||
test_specification.AddMimeType('application/zip')
|
||||
test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive')
|
||||
# WinZip 8 signature.
|
||||
test_specification.AddNewSignature('PK00', offset=0, is_bound=True)
|
||||
test_specification.AddNewSignature('PK\x01\x02')
|
||||
test_specification.AddNewSignature('PK\x03\x04', offset=0)
|
||||
test_specification.AddNewSignature('PK\x05\x05')
|
||||
# Will be at offset 0 when the archive is empty.
|
||||
test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True)
|
||||
test_specification.AddNewSignature('PK\x06\x06')
|
||||
test_specification.AddNewSignature('PK\x06\x07')
|
||||
test_specification.AddNewSignature('PK\x06\x08')
|
||||
# Will be at offset 0 when this is spanned archive.
|
||||
test_specification.AddNewSignature('PK\x07\x08')
|
||||
|
||||
return store
|
||||
Reference in New Issue
Block a user