Import from old repository

This commit is contained in:
Stefan
2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
+16
View File
@@ -0,0 +1,16 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+184
View File
@@ -0,0 +1,184 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the format classifier classes.
Plaso is a tool that extracts events from files on a file system.
For this it either reads files from a mounted file system or from an image.
It uses an exhaustive approach to determine parse events from a file, meaning
that it passes the file first to parser A and if that fails it continues with
parser B.
The classifier is designed to be able to more quickly determine the format of
a file and limit the number of parsers part of the exhaustive approach.
The current version of the classifier uses signatures to identify file formats.
Some signatures must always be defined at a specific offset, this is referred to
as an offset-bound signature or bound for short. Other signatures are commonly
found at a specific offset but not necessarily. The last form of signatures is
unbound, meaning that they don't have a fixed or common location where they can
be found.
A specification is a collection of signatures with additional metadata that
defines a specific file format. These specifications are grouped into a store
for ease of use, e.g. so that they can be read from a configuration file all
at once.
The classifier requires a scanner to analyze the data in a file. The scanner
uses the specifications in a store to scan for the signatures or a certain
format.
The classifier allows for multiple methods of scanning a file:
* full: the entire file is scanned. This is the default scanning method.
* head-tail: only the beginning (head) and the end (tail) of the file is
scanned. This approach is more efficient for larger files.
The buffer size is used as the size of the data that is scanned.
Smaller files are scanned entirely.
The classifier returns zero or more classifications which point to a format
specification and the scan results for the signatures defined by
the specification.
"""
import logging
class Classification(object):
"""This class represents a format classification.
The format classification consists of a format specification and
scan results.
"""
def __init__(self, specification, scan_matches):
"""Initializes the classification.
Args:
specification: the format specification (instance of Specification).
scan_matches: the list of scan matches (instances of _ScanMatch).
Raises:
TypeError: if the specification is not of type Specification.
"""
self._specification = specification
self.scan_matches = scan_matches
@property
def identifier(self):
"""The classification type."""
return self._specification.identifier
@property
def magic_types(self):
"""The magic types or an empty list if none."""
return self._specification.magic_types
@property
def mime_types(self):
"""The mime type or an empty list if none."""
return self._specification.mime_types
class Classifier(object):
"""Class for classifying formats in raw data.
The classifier is initialized with one or more specifications.
After which it can be used to classify data in files or file-like objects.
The actual scanning of the data is done by the scanner, these are separate
to allow for the scanner to easily be replaced for a more efficient
alternative if necessary.
For an example of how the classifier is to be used see: classify.py.
"""
BUFFER_SIZE = 16 * 1024 * 1024
def __init__(self, scanner):
"""Initializes the classifier and sets up the scanning related structures.
Args:
scanner: an instance of the signature scanner.
"""
self._scanner = scanner
def _GetClassifications(self, scan_results):
"""Retrieves the classifications based on the scan results.
Multiple scan results are combined into a single classification.
Args:
scan_results: a list containing instances of _ScanResult.
Returns:
a list of instances of Classification.
"""
classifications = {}
for scan_result in scan_results:
for scan_match in scan_result.scan_matches:
logging.debug(
u'scan match at offset: 0x{0:08x} specification: {1:s}'.format(
scan_match.total_data_offset, scan_result.identifier))
if scan_result.identifier not in classifications:
classifications[scan_result.identifier] = Classification(
scan_result.specification, scan_result.scan_matches)
return classifications.values()
def ClassifyBuffer(self, data, data_size):
"""Classifies the data in a buffer, assumes all necessary data is available.
Args:
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
Returns:
a list of classifications or an empty list.
"""
scan_state = self._scanner.StartScan()
self._scanner.ScanBuffer(scan_state, data, data_size)
self._scanner.StopScan(scan_state)
return self._GetClassifications(scan_state.GetResults())
def ClassifyFileObject(self, file_object):
"""Classifies the data in a file-like object.
Args:
file_object: a file-like object.
Returns:
a list of classifier classifications or an empty list.
"""
scan_results = self._scanner.ScanFileObject(file_object)
return self._GetClassifications(scan_results)
def ClassifyFile(self, filename):
"""Classifies the data in a file.
Args:
filename: the name of the file.
Returns:
a list of classifier classifications or an empty list.
"""
classifications = []
with open(filename, 'rb') as file_object:
classifications = self.ClassifyFileObject(file_object)
return classifications
+72
View File
@@ -0,0 +1,72 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the format classifier classes."""
import os
import unittest
from plaso.classifier import classifier
from plaso.classifier import scanner
from plaso.classifier import test_lib
class ClassifierTest(unittest.TestCase):
"""Class to test Classifier."""
def setUp(self):
"""Function to test the initialize function."""
self._store = test_lib.CreateSpecificationStore()
self._test_file1 = os.path.join('test_data', 'NTUSER.DAT')
self._test_file2 = os.path.join('test_data', 'syslog.zip')
def testClassifyFileWithScanner(self):
"""Function to test the classify file function."""
test_scanner = scanner.Scanner(self._store)
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file1)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file2)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
def testClassifyFileWithOffsetBoundScanner(self):
"""Function to test the classify file function."""
test_scanner = scanner.OffsetBoundScanner(self._store)
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file1)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file2)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
if __name__ == "__main__":
unittest.main()
+78
View File
@@ -0,0 +1,78 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a small classify test program."""
import argparse
import glob
import logging
from plaso.classifier import classifier
from plaso.classifier import scanner
from plaso.classifier import test_lib
def Main():
args_parser = argparse.ArgumentParser(
description='Classify test program.')
args_parser.add_argument(
'-t', '--type', type='choice', metavar='TYPE', action='store',
dest='scanner_type', choices=['scan-tree', 'scan_tree'],
default='scan-tree', help='The scanner type')
args_parser.add_argument(
'-v', '--verbose', action='store_true', dest='verbose', default=False,
help='Print verbose output')
args_parser.add_argument(
'filenames', nargs='+', action='store', metavar='FILENAMES',
default=None, help='The input filename(s) to classify.')
options = args_parser.parse_args()
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
files_to_classify = []
for input_glob in options.filenames:
files_to_classify += glob.glob(input_glob)
store = test_lib.CreateSpecificationStore()
if options.scanner_type not in ['scan-tree', 'scan_tree']:
print u'Unsupported scanner type defaulting to: scan-tree'
scan = scanner.Scanner(store)
classify = classifier.Classifier(scan)
for input_filename in files_to_classify:
classifications = classify.ClassifyFile(input_filename)
print u'File: {0:s}'.format(input_filename)
if not classifications:
print u'No classifications found.'
else:
print u'Classifications:'
for classification in classifications:
print u'\tformat: {0:s}'.format(classification.identifier)
print u''
if __name__ == '__main__':
Main()
+308
View File
@@ -0,0 +1,308 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The patterns classes used by the scan tree-based format scanner."""
class _ByteValuePatterns(object):
"""Class that implements a mapping between byte value and patterns.
The byte value patterns are used in the scan tree-based format scanner
to map a byte value to one or more patterns.
"""
def __init__(self, byte_value):
"""Initializes the pattern table (entry) byte value.
Args:
byte_value: the byte value that maps the patterns in the table.
"""
super(_ByteValuePatterns, self).__init__()
self.byte_value = byte_value
self.patterns = {}
def __unicode__(self):
"""Retrieves a string representation of the byte value patterns."""
return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
def AddPattern(self, pattern):
"""Adds a pattern.
Args:
pattern: the pattern (instance of Pattern).
Raises:
ValueError: if the table entry already contains a pattern
with the same identifier.
"""
if pattern.identifier in self.patterns:
raise ValueError(u'Pattern {0:s} is already defined.'.format(
pattern.identifier))
self.patterns[pattern.identifier] = pattern
def ToDebugString(self, indentation_level=1):
"""Converts the byte value pattern into a debug string."""
indentation = u' ' * indentation_level
header = u'{0:s}byte value: 0x{1:02x}\n'.format(
indentation, ord(self.byte_value))
entries = u''.join([u'{0:s} patterns: {1:s}\n'.format(
indentation, identifier) for identifier in self.patterns])
return u''.join([header, entries, u'\n'])
class _SkipTable(object):
"""Class that implements a skip table.
The skip table is used in the scan tree-based format scanner to determine
the skip value for the BoyerMooreHorspool search.
"""
def __init__(self, skip_pattern_length):
"""Initializes the skip table.
Args:
skip_pattern_length: the (maximum) skip pattern length.
"""
super(_SkipTable, self).__init__()
self._skip_value_per_byte_value = {}
self.skip_pattern_length = skip_pattern_length
def __getitem__(self, key):
"""Retrieves a specific skip value.
Args:
key: the byte value within the skip table.
Returns:
the skip value for the key or the maximim skip value
if no corresponding key was found.
"""
if key in self._skip_value_per_byte_value:
return self._skip_value_per_byte_value[key]
return self.skip_pattern_length
def SetSkipValue(self, byte_value, skip_value):
"""Sets a skip value.
Args:
byte_value: the corresponding byte value.
skip_value: the number of bytes to skip.
Raises:
ValueError: if byte value or skip value is out of bounds.
"""
if byte_value < 0 or byte_value > 255:
raise ValueError(u'Invalid byte value, value out of bounds.')
if skip_value < 0 or skip_value >= self.skip_pattern_length:
raise ValueError(u'Invalid skip value, value out of bounds.')
if (not byte_value in self._skip_value_per_byte_value or
self._skip_value_per_byte_value[byte_value] > skip_value):
self._skip_value_per_byte_value[byte_value] = skip_value
def ToDebugString(self):
"""Converts the skip table into a debug string."""
header = u'Byte value\tSkip value\n'
entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
byte_value, self._skip_value_per_byte_value[byte_value])
for byte_value in self._skip_value_per_byte_value])
default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
return u''.join([header, entries, default, u'\n'])
class Pattern(object):
"""Class that implements a pattern."""
def __init__(self, signature_index, signature, specification):
"""Initializes the pattern.
Args:
signature_index: the index of the signature within the specification.
signature: the signature (instance of Signature).
specification: the specification (instance of Specification) that
contains the signature.
"""
super(Pattern, self).__init__()
self._signature_index = signature_index
self.signature = signature
self.specification = specification
def __unicode__(self):
"""Retrieves a string representation."""
return self.identifier
@property
def expression(self):
"""The signature expression."""
return self.signature.expression
@property
def identifier(self):
"""The identifier."""
# Using _ here because some scanner implementation are limited to what
# characters can be used in the identifiers.
return u'{0:s}_{1:d}'.format(
self.specification.identifier, self._signature_index)
@property
def offset(self):
"""The signature offset."""
return self.signature.offset
@property
def is_bound(self):
"""Boolean value to indicate the signature is bound to an offset."""
return self.signature.is_bound
class PatternTable(object):
"""Class that implements a pattern table.
The pattern table is used in the the scan tree-based format scanner
to construct a scan tree. It contains either unbound patterns or
patterns bound to a specific offset.
"""
def __init__(self, patterns, ignore_list, is_bound=None):
"""Initializes and builds the patterns table from patterns.
Args:
patterns: a list of the patterns.
ignore_list: a list of pattern offsets to ignore.
is_bound: optional boolean value to indicate if the signatures are bound
to offsets. The default is None, which means the value should
be ignored and both bound and unbound patterns are considered
unbound.
Raises:
ValueError: if a signature pattern is too small to be useful (< 4).
"""
super(PatternTable, self).__init__()
self._byte_values_per_offset = {}
self.largest_pattern_length = 0
self.largest_pattern_offset = 0
self.patterns = []
self.smallest_pattern_length = 0
self.smallest_pattern_offset = 0
for pattern in patterns:
if is_bound is not None and pattern.signature.is_bound != is_bound:
continue
pattern_length = len(pattern.expression)
if pattern_length < 4:
raise ValueError(u'Pattern too small to be useful.')
self.smallest_pattern_length = min(
self.smallest_pattern_length, pattern_length)
self.largest_pattern_length = max(
self.largest_pattern_length, pattern_length)
self.patterns.append(pattern)
self._AddPattern(pattern, ignore_list, is_bound)
def _AddPattern(self, pattern, ignore_list, is_bound):
"""Adds the byte values per offset in the pattern to the table.
Args:
pattern: the pattern (instance of Pattern).
ignore_list: a list of pattern offsets to ignore.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
"""
pattern_offset = pattern.offset if is_bound else 0
self.smallest_pattern_offset = min(
self.smallest_pattern_offset, pattern_offset)
self.largest_pattern_offset = max(
self.largest_pattern_offset, pattern_offset)
for byte_value in pattern.expression:
if pattern_offset not in self._byte_values_per_offset:
self._byte_values_per_offset[pattern_offset] = {}
if pattern_offset not in ignore_list:
byte_values = self._byte_values_per_offset[pattern_offset]
if byte_value not in byte_values:
byte_values[byte_value] = _ByteValuePatterns(byte_value)
byte_value_patterns = byte_values[byte_value]
byte_value_patterns.AddPattern(pattern)
pattern_offset += 1
@property
def offsets(self):
"""The offsets."""
return self._byte_values_per_offset.keys()
def GetByteValues(self, pattern_offset):
"""Returns the bytes values for a specific pattern offset."""
return self._byte_values_per_offset[pattern_offset]
def GetSkipTable(self):
"""Retrieves the skip table for the patterns in the table.
Returns:
The skip table (instance of SkipTable).
"""
skip_table = _SkipTable(self.smallest_pattern_length)
for pattern in self.patterns:
if pattern.expression:
skip_value = self.smallest_pattern_length
for expression_index in range(0, self.smallest_pattern_length):
skip_value -= 1
skip_table.SetSkipValue(
ord(pattern.expression[expression_index]), skip_value)
return skip_table
def ToDebugString(self):
"""Converts the pattern table into a debug string."""
header = u'Pattern offset\tByte value(s)\n'
entries = u''
for pattern_offset in self._byte_values_per_offset:
entries += u'{0:d}'.format(pattern_offset)
byte_values = self._byte_values_per_offset[pattern_offset]
for byte_value in byte_values:
identifiers = u', '.join(
[identifier for identifier in byte_values[byte_value].patterns])
entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
entries += u'\n'
return u''.join([header, entries, u'\n'])
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The range list data type."""
class Range(object):
"""Class that implements a range object."""
def __init__(self, range_offset, range_size):
"""Initializes the range object.
Args:
range_offset: the range offset.
range_size: the range size.
Raises:
ValueError: if the range offset or range size is not valid.
"""
if range_offset < 0:
raise ValueError(u'Invalid range offset value.')
if range_size < 0:
raise ValueError(u'Invalid range size value.')
super(Range, self).__init__()
self.start_offset = range_offset
self.size = range_size
self.end_offset = range_offset + range_size
class RangeList(object):
"""Class that implements a range list object."""
def __init__(self):
"""Initializes the range list object."""
super(RangeList, self).__init__()
self.ranges = []
@property
def number_of_ranges(self):
"""The number of ranges."""
return len(self.ranges)
def GetSpanningRange(self):
"""Retrieves the range spanning the entire range list."""
if self.number_of_ranges == 0:
return
first_range = self.ranges[0]
last_range = self.ranges[-1]
range_size = last_range.end_offset - first_range.start_offset
return Range(first_range.start_offset, range_size)
def Insert(self, range_offset, range_size):
"""Inserts the range defined by the offset and size in the list.
Note that overlapping ranges will be merged.
Args:
range_offset: the range offset.
range_size: the range size.
Raises:
RuntimeError: if the range cannot be inserted.
ValueError: if the range offset or range size is not valid.
"""
if range_offset < 0:
raise ValueError(u'Invalid range offset value.')
if range_size < 0:
raise ValueError(u'Invalid range size value.')
insert_index = None
merge_index = None
number_of_range_objects = len(self.ranges)
range_end_offset = range_offset + range_size
if number_of_range_objects == 0:
insert_index = 0
else:
range_object_index = 0
for range_object in self.ranges:
# Ignore negative ranges.
if range_object.start_offset < 0:
range_object_index += 1
continue
# Insert the range before an existing one.
if range_end_offset < range_object.start_offset:
insert_index = range_object_index
break
# Ignore the range since the existing one overlaps it.
if (range_offset >= range_object.start_offset and
range_end_offset <= range_object.end_offset):
break
# Merge the range since it overlaps the existing one at the end.
if (range_offset >= range_object.start_offset and
range_offset <= range_object.end_offset):
merge_index = range_object_index
break
# Merge the range since it overlaps the existing one at the start.
if (range_end_offset >= range_object.start_offset and
range_end_offset <= range_object.end_offset):
merge_index = range_object_index
break
# Merge the range since it overlaps the existing one.
if (range_offset <= range_object.start_offset and
range_end_offset >= range_object.end_offset):
merge_index = range_object_index
break
range_object_index += 1
# Insert the range after the last one.
if range_object_index >= number_of_range_objects:
insert_index = number_of_range_objects
if insert_index is not None and merge_index is not None:
raise RuntimeError(
u'Unable to insert the range both insert and merge specified.')
if insert_index is not None:
self.ranges.insert(insert_index, Range(range_offset, range_size))
elif merge_index is not None:
range_object = self.ranges[merge_index]
if range_offset < range_object.start_offset:
range_object.size += range_object.start_offset - range_offset
range_object.start_offset = range_offset
if range_end_offset > range_object.end_offset:
range_object.size += range_end_offset - range_object.end_offset
range_object.end_offset = range_end_offset
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the range list."""
import unittest
from plaso.classifier import range_list
class RangeListTest(unittest.TestCase):
"""Class to test the range list."""
def testInsertPositiveRanges(self):
"""Function to test the insert function using positive ranges."""
range_list_object = range_list.RangeList()
# Test non-overlapping range.
range_list_object.Insert(500, 100)
self.assertEquals(range_list_object.number_of_ranges, 1)
range_object = range_list_object.ranges[0]
self.assertEquals(range_object.start_offset, 500)
self.assertEquals(range_object.end_offset, 600)
self.assertEquals(range_object.size, 100)
# Test non-overlapping range.
range_list_object.Insert(2000, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 2000)
self.assertEquals(range_object.end_offset, 2100)
self.assertEquals(range_object.size, 100)
# Test range that overlaps with an existing range at the start.
range_list_object.Insert(1950, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1950)
self.assertEquals(range_object.end_offset, 2100)
self.assertEquals(range_object.size, 150)
# Test range that overlaps with an existing range at the end.
range_list_object.Insert(2050, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1950)
self.assertEquals(range_object.end_offset, 2150)
self.assertEquals(range_object.size, 200)
# Test non-overlapping range.
range_list_object.Insert(1000, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1000)
self.assertEquals(range_object.end_offset, 1100)
self.assertEquals(range_object.size, 100)
# Test range that aligns with an existing range at the end.
range_list_object.Insert(1100, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1000)
self.assertEquals(range_object.end_offset, 1200)
self.assertEquals(range_object.size, 200)
# Test range that aligns with an existing range at the start.
range_list_object.Insert(900, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 900)
self.assertEquals(range_object.end_offset, 1200)
self.assertEquals(range_object.size, 300)
# Test non-overlapping range.
range_list_object.Insert(0, 100)
self.assertEquals(range_list_object.number_of_ranges, 4)
range_object = range_list_object.ranges[0]
self.assertEquals(range_object.start_offset, 0)
self.assertEquals(range_object.end_offset, 100)
self.assertEquals(range_object.size, 100)
# Test invalid ranges.
with self.assertRaises(ValueError):
range_list_object.Insert(-1, 100)
with self.assertRaises(ValueError):
range_list_object.Insert(3000, -100)
if __name__ == '__main__':
unittest.main()
+744
View File
@@ -0,0 +1,744 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The scan tree classes used by the scan tree-based format scanner."""
import logging
from plaso.classifier import patterns
from plaso.classifier import range_list
class _PatternWeights(object):
"""Class that implements pattern weights."""
def __init__(self):
"""Initializes the pattern weights."""
super(_PatternWeights, self).__init__()
self._offsets_per_weight = {}
self._weight_per_offset = {}
def AddOffset(self, pattern_offset):
"""Adds a pattern offset and sets its weight to 0.
Args:
pattern_offset: the pattern offset to add to the pattern weights.
Raises:
ValueError: if the pattern weights already contains the pattern offset.
"""
if pattern_offset in self._weight_per_offset:
raise ValueError(u'Pattern offset already set.')
self._weight_per_offset[pattern_offset] = 0
def AddWeight(self, pattern_offset, weight):
"""Adds a weight for a specific pattern offset.
Args:
pattern_offset: the pattern offset to add to the pattern weights.
weight: the corresponding weight to add.
Raises:
ValueError: if the pattern weights does not contain the pattern offset.
"""
if pattern_offset not in self._weight_per_offset:
raise ValueError(u'Pattern offset not set.')
self._weight_per_offset[pattern_offset] += weight
if weight not in self._offsets_per_weight:
self._offsets_per_weight[weight] = []
self._offsets_per_weight[weight].append(pattern_offset)
def GetLargestWeight(self):
"""Retrieves the largest weight or 0 if none."""
if self._offsets_per_weight:
return max(self._offsets_per_weight)
return 0
def GetOffsetsForWeight(self, weight):
"""Retrieves the list of offsets for a specific weight."""
return self._offsets_per_weight[weight]
def GetWeightForOffset(self, pattern_offset):
"""Retrieves the weight for a specific pattern offset."""
return self._weight_per_offset[pattern_offset]
def ToDebugString(self):
"""Converts the pattern weights into a debug string."""
header1 = u'Pattern offset\tWeight\n'
entries1 = u''.join([u'{0:d}\t{1:d}\n'.format(
pattern_offset, self._weight_per_offset[pattern_offset])
for pattern_offset in self._weight_per_offset])
header2 = u'Weight\tPattern offset(s)\n'
entries2 = u''.join([u'{0:d}\t{1!s}\n'.format(
weight, self._offsets_per_weight[weight])
for weight in self._offsets_per_weight])
return u''.join([header1, entries1, u'\n', header2, entries2, u'\n'])
def SetWeight(self, pattern_offset, weight):
"""Sets a weight for a specific pattern offset.
Args:
pattern_offset: the pattern offset to set in the pattern weights.
weight: the corresponding weight to set.
Raises:
ValueError: if the pattern weights does not contain the pattern offset.
"""
if pattern_offset not in self._weight_per_offset:
raise ValueError(u'Pattern offset not set.')
self._weight_per_offset[pattern_offset] = weight
if weight not in self._offsets_per_weight:
self._offsets_per_weight[weight] = []
self._offsets_per_weight[weight].append(pattern_offset)
class ScanTree(object):
"""Class that implements a scan tree."""
_COMMON_BYTE_VALUES = frozenset(
'\x00\x01\xff\t\n\r 0123456789'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz')
# The offset must be positive, negative offsets are ignored.
OFFSET_MODE_POSITIVE = 1
# The offset must be negative, positive offsets are ignored.
OFFSET_MODE_NEGATIVE = 2
# The offset must be positive, an error is raised for negative offsets.
OFFSET_MODE_POSITIVE_STRICT = 3
# The offset must be negative, an error is raised for positive offsets.
OFFSET_MODE_NEGATIVE_STRICT = 4
def __init__(
self, specification_store, is_bound,
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
"""Initializes and builds the scan tree.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
offset_mode: optional value to indicate how the signature offsets should
be handled. The default is that the offset must be positive
and an error is raised for negative offsets.
"""
super(ScanTree, self).__init__()
self.largest_length = 0
self.pattern_list = []
self.range_list = range_list.RangeList()
self.root_node = None
self.skip_table = None
# First determine all the patterns from the specification store.
self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode)
# Next create the scan tree starting with the root node.
ignore_list = []
pattern_table = patterns.PatternTable(
self.pattern_list, ignore_list, is_bound)
if pattern_table.patterns:
self.root_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(u'Scan tree:\n{0:s}'.format(
self.root_node.ToDebugString()))
# At the end the skip table is determined to provide for the
# BoyerMooreHorspool skip value.
self.skip_table = pattern_table.GetSkipTable()
logging.debug(u'Skip table:\n{0:s}'.format(
self.skip_table.ToDebugString()))
self.largest_length = pattern_table.largest_pattern_length
def _BuildPatterns(
self, specification_store, is_bound,
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
"""Builds the list of patterns.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
offset_mode: optional value to indicate how the signature offsets should
be handled. The default is that the offset must be positive
and an error is raised for negative offsets.
Raises:
ValueError: if a signature offset invalid according to specified offset
mode or a signature pattern is too small to be useful (< 4).
"""
self.pattern_list = []
for specification in specification_store.specifications:
signature_index = 0
for signature in specification.signatures:
if signature.expression:
signature_offset = signature.offset if is_bound else 0
signature_pattern_length = len(signature.expression)
# Make sure signature offset is numeric.
try:
signature_offset = int(signature_offset)
except (TypeError, ValueError):
signature_offset = 0
if signature_offset < 0:
if offset_mode == self.OFFSET_MODE_POSITIVE:
continue
elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT:
raise ValueError(u'Signature offset less than 0.')
# The range list does not allow offsets to be negative and thus
# the signature offset is turned into a positive equivalent.
signature_offset *= -1
# The signature size is substracted to make sure the spanning
# range will align with the original negative offset values.
signature_offset -= signature_pattern_length
elif signature_offset > 0:
if offset_mode == self.OFFSET_MODE_NEGATIVE:
continue
elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT:
raise ValueError(u'Signature offset greater than 0.')
if signature_pattern_length < 4:
raise ValueError(u'Signature pattern smaller than 4.')
pattern = patterns.Pattern(
signature_index, signature, specification)
self.pattern_list.append(pattern)
self.range_list.Insert(signature_offset, signature_pattern_length)
signature_index += 1
def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound):
"""Builds a scan tree node.
Args:
pattern_table: a pattern table (instance of PatternTable).
ignore_list: a list of pattern offsets to ignore
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
Raises:
ValueError: if number of byte value patterns value out of bounds.
Returns:
A scan tree node (instance of ScanTreeNode).
"""
# Make a copy of the lists because the function is going to alter them
# and the changes must remain in scope of the function.
pattern_list = list(pattern_table.patterns)
ignore_list = list(ignore_list)
similarity_weights = _PatternWeights()
occurrence_weights = _PatternWeights()
value_weights = _PatternWeights()
for pattern_offset in pattern_table.offsets:
similarity_weights.AddOffset(pattern_offset)
occurrence_weights.AddOffset(pattern_offset)
value_weights.AddOffset(pattern_offset)
byte_values = pattern_table.GetByteValues(pattern_offset)
number_of_byte_values = len(byte_values)
if number_of_byte_values > 1:
occurrence_weights.SetWeight(pattern_offset, number_of_byte_values)
for byte_value in byte_values:
byte_value_patterns = byte_values[byte_value]
byte_value_weight = len(byte_value_patterns.patterns)
if byte_value_weight > 1:
similarity_weights.AddWeight(pattern_offset, byte_value_weight)
if byte_value_weight not in self._COMMON_BYTE_VALUES:
value_weights.AddWeight(pattern_offset, 1)
logging.debug(u'Pattern table:\n{0:s}'.format(
pattern_table.ToDebugString()))
logging.debug(u'Similarity weights:\n{0:s}'.format(
similarity_weights.ToDebugString()))
logging.debug(u'Occurrence weights:\n{0:s}'.format(
occurrence_weights.ToDebugString()))
logging.debug(u'Value weights:\n{0:s}'.format(
value_weights.ToDebugString()))
pattern_offset = self._GetMostSignificantPatternOffset(
pattern_list, similarity_weights, occurrence_weights, value_weights)
ignore_list.append(pattern_offset)
# For the scan tree negative offsets are adjusted so that
# the smallest pattern offset is 0.
scan_tree_pattern_offset = pattern_offset
if scan_tree_pattern_offset < 0:
scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset
scan_tree_node = ScanTreeNode(scan_tree_pattern_offset)
byte_values = pattern_table.GetByteValues(pattern_offset)
for byte_value in byte_values:
byte_value_patterns = byte_values[byte_value]
logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString()))
number_of_byte_value_patterns = len(byte_value_patterns.patterns)
if number_of_byte_value_patterns <= 0:
raise ValueError(
u'Invalid number of byte value patterns value out of bounds.')
elif number_of_byte_value_patterns == 1:
for identifier in byte_value_patterns.patterns:
logging.debug(
u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format(
identifier, ord(byte_value)))
scan_tree_node.AddByteValue(
byte_value, byte_value_patterns.patterns[identifier])
else:
pattern_table = patterns.PatternTable(
byte_value_patterns.patterns.itervalues(), ignore_list, is_bound)
scan_sub_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(
u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format(
ord(byte_value), scan_sub_node.ToDebugString()))
scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node)
for identifier in byte_value_patterns.patterns:
logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format(
identifier, self._PatternsToDebugString(pattern_list)))
pattern_list.remove(byte_value_patterns.patterns[identifier])
logging.debug(u'Remaining patterns:\n{0:s}'.format(
self._PatternsToDebugString(pattern_list)))
number_of_patterns = len(pattern_list)
if number_of_patterns == 1:
logging.debug(u'Setting pattern: {0:s} for default value'.format(
pattern_list[0].identifier))
scan_tree_node.SetDefaultValue(pattern_list[0])
elif number_of_patterns > 1:
pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound)
scan_sub_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(u'Setting scan node for default value:\n{0:s}'.format(
scan_sub_node.ToDebugString()))
scan_tree_node.SetDefaultValue(scan_sub_node)
return scan_tree_node
def _GetMostSignificantPatternOffset(
self, pattern_list, similarity_weights, occurrence_weights,
value_weights):
"""Returns the most significant pattern offset.
Args:
pattern_list: a list of patterns
similarity_weights: the similarity (pattern) weights.
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Raises:
ValueError: when pattern is an empty list.
Returns:
a pattern offset.
"""
if not pattern_list:
raise ValueError(u'Missing pattern list.')
pattern_offset = None
number_of_patterns = len(pattern_list)
if number_of_patterns == 1:
pattern_offset = self._GetPatternOffsetForValueWeights(
value_weights)
elif number_of_patterns == 2:
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
occurrence_weights, value_weights)
elif number_of_patterns > 2:
pattern_offset = self._GetPatternOffsetForSimilarityWeights(
similarity_weights, occurrence_weights, value_weights)
logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset))
return pattern_offset
def _GetPatternOffsetForOccurrenceWeights(
self, occurrence_weights, value_weights):
"""Returns the most significant pattern offset based on the value weights.
Args:
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Returns:
a pattern offset.
"""
debug_string = ""
pattern_offset = None
largest_weight = occurrence_weights.GetLargestWeight()
logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight(
largest_weight)
number_of_occurrence_offsets = len(occurrence_weight_offsets)
else:
number_of_occurrence_offsets = 0
if number_of_occurrence_offsets == 0:
pattern_offset = self._GetPatternOffsetForValueWeights(
value_weights)
elif number_of_occurrence_offsets == 1:
pattern_offset = occurrence_weight_offsets[0]
else:
largest_weight = 0
largest_value_weight = 0
for occurrence_offset in occurrence_weight_offsets:
value_weight = value_weights.GetWeightForOffset(
occurrence_offset)
debug_string = (
u'Occurrence offset: {0:d} value weight: {1:d}').format(
occurrence_offset, value_weight)
if not pattern_offset or largest_weight < value_weight:
largest_weight = value_weight
pattern_offset = occurrence_offset
debug_string += u' largest value weight: {0:d}'.format(
largest_value_weight)
logging.debug(u'{0:s}'.format(debug_string))
return pattern_offset
def _GetPatternOffsetForSimilarityWeights(
self, similarity_weights, occurrence_weights, value_weights):
"""Returns the most significant pattern offset.
Args:
similarity_weights: the similarity (pattern) weights.
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Returns:
a pattern offset.
"""
debug_string = ""
pattern_offset = None
largest_weight = similarity_weights.GetLargestWeight()
logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
similarity_weight_offsets = similarity_weights.GetOffsetsForWeight(
largest_weight)
number_of_similarity_offsets = len(similarity_weight_offsets)
else:
number_of_similarity_offsets = 0
if number_of_similarity_offsets == 0:
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
occurrence_weights, value_weights)
elif number_of_similarity_offsets == 1:
pattern_offset = similarity_weight_offsets[0]
else:
largest_weight = 0
largest_value_weight = 0
for similarity_offset in similarity_weight_offsets:
occurrence_weight = occurrence_weights.GetWeightForOffset(
similarity_offset)
debug_string = (
u'Similarity offset: {0:d} occurrence weight: {1:d}').format(
similarity_offset, occurrence_weight)
if largest_weight > 0 and largest_weight == occurrence_weight:
value_weight = value_weights.GetWeightForOffset(
similarity_offset)
debug_string += u' value weight: {0:d}'.format(value_weight)
if largest_value_weight < value_weight:
largest_weight = 0
if not pattern_offset or largest_weight < occurrence_weight:
largest_weight = occurrence_weight
pattern_offset = similarity_offset
largest_value_weight = value_weights.GetWeightForOffset(
similarity_offset)
debug_string += u' largest value weight: {0:d}'.format(
largest_value_weight)
logging.debug(u'{0:s}'.format(debug_string))
return pattern_offset
def _GetPatternOffsetForValueWeights(
self, value_weights):
"""Returns the most significant pattern offset based on the value weights.
Args:
value_weights: the value (pattern) weights.
Raises:
RuntimeError: no value weight offset were found.
Returns:
a pattern offset.
"""
largest_weight = value_weights.GetLargestWeight()
logging.debug(u'Largest value weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight)
number_of_value_offsets = len(value_weight_offsets)
else:
number_of_value_offsets = 0
if number_of_value_offsets == 0:
raise RuntimeError(u'No value weight offsets found.')
return value_weight_offsets[0]
def _PatternsToDebugString(self, pattern_list):
"""Converts the list of patterns into a debug string."""
entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list])
return u''.join([u'[', entries, u']'])
class ScanTreeNode(object):
"""Class that implements a scan tree node."""
def __init__(self, pattern_offset):
"""Initializes the scan tree node.
Args:
pattern_offset: the offset in the pattern to which the node
applies.
"""
super(ScanTreeNode, self).__init__()
self._byte_values = {}
self.default_value = None
self.parent = None
self.pattern_offset = pattern_offset
def AddByteValue(self, byte_value, scan_object):
"""Adds a byte value.
Args:
byte_value: the corresponding byte value.
scan_object: the scan object, either a scan sub node or a pattern.
Raises:
ValueError: if byte value is out of bounds or if the node already
contains a scan object for the byte value.
"""
if isinstance(byte_value, str):
byte_value = ord(byte_value)
if byte_value < 0 or byte_value > 255:
raise ValueError(u'Invalid byte value, value out of bounds.')
if byte_value in self._byte_values:
raise ValueError(u'Byte value already set.')
if isinstance(scan_object, ScanTreeNode):
scan_object.parent = self
self._byte_values[byte_value] = scan_object
def CompareByteValue(
self, data, data_offset, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the bounded scan tree.
This function will return partial matches on the ata block block
boundary as long as the total data size has not been reached.
Args:
data: a buffer containing raw data.
data_offset: the offset in the raw data in the buffer.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
the resulting scan object which is either a ScanTreeNode or Pattern
or None.
Raises:
RuntimeError: if the data offset, total data offset, total data size
or pattern offset value is out of bounds.
"""
found_match = False
scan_tree_byte_value = 0
if data_offset < 0 or data_offset >= data_size:
raise RuntimeError(u'Invalid data offset, value out of bounds.')
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size, value out of bounds.')
if total_data_offset < 0 or (
total_data_size is not None and total_data_offset >= total_data_size):
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
if (total_data_size is not None and
total_data_offset + data_size >= total_data_size):
match_on_boundary = True
else:
match_on_boundary = False
data_offset += self.pattern_offset
if not match_on_boundary and data_offset >= data_size:
raise RuntimeError(u'Invalid pattern offset value, out of bounds.')
if data_offset < data_size:
data_byte_value = ord(data[data_offset])
for scan_tree_byte_value in self._byte_values:
if data_byte_value == scan_tree_byte_value:
found_match = True
break
if found_match:
scan_object = self._byte_values[scan_tree_byte_value]
logging.debug(
u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset)
)
else:
scan_object = self.default_value
if not scan_object:
scan_object = self.parent
while scan_object and not scan_object.default_value:
scan_object = scan_object.parent
if scan_object:
scan_object = scan_object.default_value
return scan_object
def SetDefaultValue(self, scan_object):
"""Sets the default (non-match) value.
Args:
scan_object: the scan object, either a scan sub node or a pattern.
Raises:
ValueError: if the default value is already set.
"""
if self.default_value:
raise ValueError(u'Default value already set.')
self.default_value = scan_object
def ToDebugString(self, indentation_level=1):
"""Converts the scan tree node into a debug string."""
indentation = u' ' * indentation_level
header = u'{0:s}pattern offset: {1:d}\n'.format(
indentation, self.pattern_offset)
entries = u''
for byte_value in self._byte_values:
entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value)
if isinstance(self._byte_values[byte_value], ScanTreeNode):
entries += u'{0:s}scan tree node:\n'.format(indentation)
entries += self._byte_values[byte_value].ToDebugString(
indentation_level + 1)
elif isinstance(self._byte_values[byte_value], patterns.Pattern):
entries += u'{0:s}pattern: {1:s}\n'.format(
indentation, self._byte_values[byte_value].identifier)
default = u'{0:s}default value:\n'.format(indentation)
if isinstance(self.default_value, ScanTreeNode):
default += u'{0:s}scan tree node:\n'.format(indentation)
default += self.default_value.ToDebugString(indentation_level + 1)
elif isinstance(self.default_value, patterns.Pattern):
default += u'{0:s}pattern: {1:s}\n'.format(
indentation, self.default_value.identifier)
return u''.join([header, entries, default, u'\n'])
+74
View File
@@ -0,0 +1,74 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the scan tree classes."""
import unittest
from plaso.classifier import patterns
from plaso.classifier import scan_tree
from plaso.classifier import specification
class ScanTreeNodeTest(unittest.TestCase):
"""Class to test the scan tree node."""
def testAddByteValueWithPattern(self):
"""Function to test the add byte value with pattern function."""
scan_node = scan_tree.ScanTreeNode(0)
format_regf = specification.Specification('REGF')
format_regf.AddNewSignature('regf', offset=0)
format_esedb = specification.Specification('ESEDB')
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4)
signature_regf = specification.Signature('regf', offset=0)
pattern_regf = patterns.Pattern(0, signature_regf, format_regf)
pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb)
scan_node.AddByteValue('r', pattern_regf)
scan_node.AddByteValue('\xef', pattern_esedb)
self.assertRaises(
ValueError, scan_node.AddByteValue, 'r', pattern_regf)
self.assertRaises(
ValueError, scan_node.AddByteValue, -1, pattern_regf)
self.assertRaises(
ValueError, scan_node.AddByteValue, 256, pattern_regf)
def testAddByteValueWithScanNode(self):
"""Function to test the add byte value with scan node function."""
scan_node = scan_tree.ScanTreeNode(0)
scan_sub_node_0x41 = scan_tree.ScanTreeNode(1)
scan_sub_node_0x80 = scan_tree.ScanTreeNode(1)
scan_node.AddByteValue(0x41, scan_sub_node_0x41)
scan_node.AddByteValue(0x80, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80)
if __name__ == '__main__':
unittest.main()
+749
View File
@@ -0,0 +1,749 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the classes for a scan tree-based format scanner."""
import logging
import os
from plaso.classifier import patterns
from plaso.classifier import range_list
from plaso.classifier import scan_tree
class _ScanMatch(object):
"""Class that implements a scan match."""
def __init__(self, total_data_offset, pattern):
"""Initializes the scan result.
Args:
total_data_offset: the offset of the resulting match relative
to the start of the total data scanned.
pattern: the pattern matched.
"""
super(_ScanMatch, self).__init__()
self.total_data_offset = total_data_offset
self.pattern = pattern
@property
def specification(self):
"""The specification."""
return self.pattern.specification
class _ScanResult(object):
"""Class that implements a scan result."""
def __init__(self, specification):
"""Initializes the scan result.
Args:
scan_tree_node: the corresponding scan tree node or None.
"""
super(_ScanResult, self).__init__()
self.specification = specification
self.scan_matches = []
@property
def identifier(self):
"""The specification identifier."""
return self.specification.identifier
class ScanState(object):
"""Class that implements a scan state."""
# The state definitions.
_SCAN_STATE_START = 1
_SCAN_STATE_SCANNING = 2
_SCAN_STATE_STOP = 3
def __init__(self, scan_tree_node, total_data_size=None):
"""Initializes the scan state.
Args:
scan_tree_node: the corresponding scan tree node or None.
total_data_size: optional value to indicate the total data size.
The default is None.
"""
super(ScanState, self).__init__()
self._matches = []
self.remaining_data = None
self.remaining_data_size = 0
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_START
self.total_data_offset = 0
self.total_data_size = total_data_size
def AddMatch(self, total_data_offset, pattern):
"""Adds a result to the state to scanning.
Args:
total_data_offset: the offset of the resulting match relative
to the start total data scanned.
pattern: the pattern matched.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self._matches.append(_ScanMatch(total_data_offset, pattern))
def GetMatches(self):
"""Retrieves a list containing the results.
Returns:
A list of scan matches (instances of _ScanMatch).
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if self.state != self._SCAN_STATE_STOP:
raise RuntimeError(u'Unsupported scan state.')
return self._matches
def Reset(self, scan_tree_node):
"""Resets the state to start.
This function will clear the remaining data.
Args:
scan_tree_node: the corresponding scan tree node or None.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if self.state != self._SCAN_STATE_STOP:
raise RuntimeError(u'Unsupported scan state.')
self.remaining_data = None
self.remaining_data_size = 0
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_START
def Scanning(self, scan_tree_node, total_data_offset):
"""Sets the state to scanning.
Args:
scan_tree_node: the active scan tree node.
total_data_offset: the offset of the resulting match relative
to the start of the total data scanned.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_SCANNING
self.total_data_offset = total_data_offset
def Stop(self):
"""Sets the state to stop.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self.scan_tree_node = None
self.state = self._SCAN_STATE_STOP
class ScanTreeScannerBase(object):
"""Class that implements a scan tree-based scanner base."""
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(ScanTreeScannerBase, self).__init__()
self._scan_tree = None
self._specification_store = specification_store
def _ScanBufferScanState(
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the scan tree.
This function implements a BoyerMooreHorspool equivalent approach
in combination with the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
Raises:
RuntimeError: if the total data offset, total data size or the last
pattern offset value is out of bounds
"""
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size, value out of bounds.')
if total_data_offset < 0 or (
total_data_size is not None and total_data_offset >= total_data_size):
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
data_offset = 0
scan_tree_node = scan_state.scan_tree_node
if scan_state.remaining_data:
# str.join() should be more efficient then concatenation by +.
data = ''.join([scan_state.remaining_data, data])
data_size += scan_state.remaining_data_size
scan_state.remaining_data = None
scan_state.remaining_data_size = 0
if (total_data_size is not None and
total_data_offset + data_size >= total_data_size):
match_on_boundary = True
else:
match_on_boundary = False
while data_offset < data_size:
if (not match_on_boundary and
data_offset + scan_tree_object.largest_length >= data_size):
break
found_match = False
scan_done = False
while not scan_done:
scan_object = scan_tree_node.CompareByteValue(
data, data_offset, data_size, total_data_offset,
total_data_size=total_data_size)
if isinstance(scan_object, scan_tree.ScanTreeNode):
scan_tree_node = scan_object
else:
scan_done = True
if isinstance(scan_object, patterns.Pattern):
pattern_length = len(scan_object.signature.expression)
data_last_offset = data_offset + pattern_length
if cmp(scan_object.signature.expression,
data[data_offset:data_last_offset]) == 0:
if (not scan_object.signature.is_bound or
scan_object.signature.offset == data_offset):
found_match = True
logging.debug(
u'Signature match at data offset: 0x{0:08x}.'.format(
data_offset))
scan_state.AddMatch(total_data_offset + data_offset, scan_object)
if found_match:
skip_value = len(scan_object.signature.expression)
scan_tree_node = scan_tree_object.root_node
else:
last_pattern_offset = (
scan_tree_object.skip_table.skip_pattern_length - 1)
if data_offset + last_pattern_offset >= data_size:
raise RuntimeError(
u'Invalid last pattern offset, value out of bounds.')
skip_value = 0
while last_pattern_offset >= 0 and not skip_value:
last_data_offset = data_offset + last_pattern_offset
byte_value = ord(data[last_data_offset])
skip_value = scan_tree_object.skip_table[byte_value]
last_pattern_offset -= 1
if not skip_value:
skip_value = 1
scan_tree_node = scan_tree_object.root_node
data_offset += skip_value
if not match_on_boundary and data_offset < data_size:
scan_state.remaining_data = data[data_offset:data_size]
scan_state.remaining_data_size = data_size - data_offset
scan_state.Scanning(scan_tree_node, total_data_offset + data_offset)
def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state):
"""Scans the remaining data in the scan state using the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
"""
if scan_state.remaining_data:
data = scan_state.remaining_data
data_size = scan_state.remaining_data_size
scan_state.remaining_data = None
scan_state.remaining_data_size = 0
# Setting the total data size will make boundary matches are returned
# in this scanning pass.
total_data_size = scan_state.total_data_size
if total_data_size is None:
total_data_size = scan_state.total_data_offset + data_size
self._ScanBufferScanState(
scan_tree_object, scan_state, data, data_size,
scan_state.total_data_offset, total_data_size=total_data_size)
scan_state.Stop()
def GetScanResults(self, scan_state):
"""Retrieves the scan results.
Args:
scan_state: the scan state (instance of ScanState).
Return:
A list of scan results (instances of _ScanResult).
"""
scan_results = {}
for scan_match in scan_state.GetMatches():
specification = scan_match.specification
identifier = specification.identifier
logging.debug(
u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format(
scan_match.total_data_offset, identifier))
if identifier not in scan_results:
scan_results[identifier] = _ScanResult(specification)
scan_results[identifier].scan_matches.append(scan_match)
return scan_results.values()
class Scanner(ScanTreeScannerBase):
"""Class that implements a scan tree-based scanner."""
_READ_BUFFER_SIZE = 512
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(Scanner, self).__init__(specification_store)
def ScanBuffer(self, scan_state, data, data_size):
"""Scans a buffer.
Args:
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
"""
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size,
scan_state.total_data_offset,
total_data_size=scan_state.total_data_size)
def ScanFileObject(self, file_object):
"""Scans a file-like object.
Args:
file_object: a file-like object.
Returns:
A list of scan results (instances of ScanResult).
"""
file_offset = 0
if hasattr(file_object, 'get_size'):
file_size = file_object.get_size()
else:
file_object.seek(0, os.SEEK_END)
file_size = file_object.tell()
scan_state = self.StartScan(total_data_size=file_size)
file_object.seek(file_offset, os.SEEK_SET)
while file_offset < file_size:
data = file_object.read(self._READ_BUFFER_SIZE)
data_size = len(data)
if data_size == 0:
break
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size, file_offset,
total_data_size=file_size)
file_offset += data_size
self.StopScan(scan_state)
return self.GetScanResults(scan_state)
def StartScan(self, total_data_size=None):
"""Starts a scan.
The function sets up the scanning related structures if necessary.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A scan state (instance of ScanState).
Raises:
RuntimeError: when total data size is invalid.
"""
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size.')
if self._scan_tree is None:
self._scan_tree = scan_tree.ScanTree(
self._specification_store, None)
return ScanState(self._scan_tree.root_node, total_data_size=total_data_size)
def StopScan(self, scan_state):
"""Stops a scan.
Args:
scan_state: the scan state (instance of ScanState).
"""
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
class OffsetBoundScanner(ScanTreeScannerBase):
"""Class that implements an offset-bound scan tree-based scanner."""
_READ_BUFFER_SIZE = 512
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(OffsetBoundScanner, self).__init__(specification_store)
self._footer_scan_tree = None
self._footer_spanning_range = None
self._header_scan_tree = None
self._header_spanning_range = None
def _GetFooterRange(self, total_data_size):
"""Retrieves the read buffer aligned footer range.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A range (instance of Range).
"""
# The actual footer range is in reverse since the spanning footer range
# is based on positive offsets, where 0 is the end of file.
if self._footer_spanning_range.end_offset < total_data_size:
footer_range_start_offset = (
total_data_size - self._footer_spanning_range.end_offset)
else:
footer_range_start_offset = 0
# Calculate the lower bound modulus of the footer range start offset
# in increments of the read buffer size.
footer_range_start_offset /= self._READ_BUFFER_SIZE
footer_range_start_offset *= self._READ_BUFFER_SIZE
# Calculate the upper bound modulus of the footer range size
# in increments of the read buffer size.
footer_range_size = self._footer_spanning_range.size
remainder = footer_range_size % self._READ_BUFFER_SIZE
footer_range_size /= self._READ_BUFFER_SIZE
if remainder > 0:
footer_range_size += 1
footer_range_size *= self._READ_BUFFER_SIZE
return range_list.Range(footer_range_start_offset, footer_range_size)
def _GetHeaderRange(self):
"""Retrieves the read buffer aligned header range.
Returns:
A range (instance of Range).
"""
# Calculate the lower bound modulus of the header range start offset
# in increments of the read buffer size.
header_range_start_offset = self._header_spanning_range.start_offset
header_range_start_offset /= self._READ_BUFFER_SIZE
header_range_start_offset *= self._READ_BUFFER_SIZE
# Calculate the upper bound modulus of the header range size
# in increments of the read buffer size.
header_range_size = self._header_spanning_range.size
remainder = header_range_size % self._READ_BUFFER_SIZE
header_range_size /= self._READ_BUFFER_SIZE
if remainder > 0:
header_range_size += 1
header_range_size *= self._READ_BUFFER_SIZE
return range_list.Range(header_range_start_offset, header_range_size)
def _ScanBufferScanState(
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the scan tree.
This function implements a BoyerMooreHorspool equivalent approach
in combination with the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
"""
scan_done = False
scan_tree_node = scan_tree_object.root_node
while not scan_done:
data_offset = 0
scan_object = scan_tree_node.CompareByteValue(
data, data_offset, data_size, total_data_offset,
total_data_size=total_data_size)
if isinstance(scan_object, scan_tree.ScanTreeNode):
scan_tree_node = scan_object
else:
scan_done = True
if isinstance(scan_object, patterns.Pattern):
pattern_length = len(scan_object.signature.expression)
pattern_start_offset = scan_object.signature.offset
pattern_end_offset = pattern_start_offset + pattern_length
if cmp(scan_object.signature.expression,
data[pattern_start_offset:pattern_end_offset]) == 0:
scan_state.AddMatch(
total_data_offset + scan_object.signature.offset, scan_object)
logging.debug(
u'Signature match at data offset: 0x{0:08x}.'.format(data_offset))
# TODO: implement.
# def ScanBuffer(self, scan_state, data, data_size):
# """Scans a buffer.
# Args:
# scan_state: the scan state (instance of ScanState).
# data: a buffer containing raw data.
# data_size: the size of the raw data in the buffer.
# """
# # TODO: fix footer scanning logic.
# # need to know the file size here for the footers.
# # TODO: check for clashing ranges?
# header_range = self._GetHeaderRange()
# footer_range = self._GetFooterRange(scan_state.total_data_size)
# if self._scan_tree == self._header_scan_tree:
# if (scan_state.total_data_offset >= header_range.start_offset and
# scan_state.total_data_offset < header_range.end_offset):
# self._ScanBufferScanState(
# self._scan_tree, scan_state, data, data_size,
# scan_state.total_data_offset,
# total_data_size=scan_state.total_data_size)
# elif scan_state.total_data_offset > header_range.end_offset:
# # TODO: implement.
# pass
# if self._scan_tree == self._footer_scan_tree:
# if (scan_state.total_data_offset >= footer_range.start_offset and
# scan_state.total_data_offset < footer_range.end_offset):
# self._ScanBufferScanState(
# self._scan_tree, scan_state, data, data_size,
# scan_state.total_data_offset,
# total_data_size=scan_state.total_data_size)
def ScanFileObject(self, file_object):
"""Scans a file-like object.
Args:
file_object: a file-like object.
Returns:
A scan state (instance of ScanState).
"""
# TODO: add support for fixed size block-based reads.
if hasattr(file_object, 'get_size'):
file_size = file_object.get_size()
else:
file_object.seek(0, os.SEEK_END)
file_size = file_object.tell()
file_offset = 0
scan_state = self.StartScan(total_data_size=file_size)
if self._header_scan_tree.root_node is not None:
header_range = self._GetHeaderRange()
# TODO: optimize the read by supporting fixed size block-based reads.
# if file_offset < header_range.start_offset:
# file_offset = header_range.start_offset
file_object.seek(file_offset, os.SEEK_SET)
# TODO: optimize the read by supporting fixed size block-based reads.
# data = file_object.read(header_range.size)
data = file_object.read(header_range.end_offset)
data_size = len(data)
if data_size > 0:
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size, file_offset,
total_data_size=file_size)
file_offset += data_size
if self._footer_scan_tree.root_node is not None:
self.StopScan(scan_state)
self._scan_tree = self._footer_scan_tree
scan_state.Reset(self._scan_tree.root_node)
if self._footer_scan_tree.root_node is not None:
footer_range = self._GetFooterRange(file_size)
# Note that the offset in the footer scan tree start with 0. Make sure
# the data offset of the data being scanned is aligned with the offset
# in the scan tree.
if footer_range.start_offset < self._footer_spanning_range.end_offset:
data_offset = (
self._footer_spanning_range.end_offset - footer_range.start_offset)
else:
data_offset = 0
if file_offset < footer_range.start_offset:
file_offset = footer_range.start_offset
file_object.seek(file_offset, os.SEEK_SET)
data = file_object.read(self._READ_BUFFER_SIZE)
data_size = len(data)
if data_size > 0:
self._ScanBufferScanState(
self._scan_tree, scan_state, data[data_offset:],
data_size - data_offset, file_offset + data_offset,
total_data_size=file_size)
self.StopScan(scan_state)
return self.GetScanResults(scan_state)
def StartScan(self, total_data_size=None):
"""Starts a scan.
The function sets up the scanning related structures if necessary.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A list of scan results (instances of ScanResult).
Raises:
RuntimeError: when total data size is invalid.
"""
if total_data_size is None or total_data_size < 0:
raise RuntimeError(u'Invalid total data size.')
if self._header_scan_tree is None:
self._header_scan_tree = scan_tree.ScanTree(
self._specification_store, True,
offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE)
if self._header_spanning_range is None:
spanning_range = self._header_scan_tree.range_list.GetSpanningRange()
self._header_spanning_range = spanning_range
if self._footer_scan_tree is None:
self._footer_scan_tree = scan_tree.ScanTree(
self._specification_store, True,
offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE)
if self._footer_spanning_range is None:
spanning_range = self._footer_scan_tree.range_list.GetSpanningRange()
self._footer_spanning_range = spanning_range
if self._header_scan_tree.root_node is not None:
self._scan_tree = self._header_scan_tree
elif self._footer_scan_tree.root_node is not None:
self._scan_tree = self._footer_scan_tree
else:
self._scan_tree = None
if self._scan_tree is not None:
root_node = self._scan_tree.root_node
else:
root_node = None
return ScanState(root_node, total_data_size=total_data_size)
def StopScan(self, scan_state):
"""Stops a scan.
Args:
scan_state: the scan state (instance of ScanState).
"""
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
self._scan_tree = None
+119
View File
@@ -0,0 +1,119 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the format scanner classes."""
import unittest
from plaso.classifier import scanner
from plaso.classifier import test_lib
class ScannerTest(unittest.TestCase):
"""Class to test the scanner."""
def testInitialize(self):
"""Function to test the initialize function."""
store = test_lib.CreateSpecificationStore()
# Signature for LNK
data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
'\x00\x00\x00\x46')
# Signature for REGF
data2 = 'regf'
# Random data
data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4'
# Boundary scan test
data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK')
data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z')
# Large buffer test
data5_size = 1024 * 1024
data5 = '\x00' * (data5_size - 4)
data5 += 'PK\x07\x08'
test_scanner = scanner.Scanner(store)
total_data_size = len(data1)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data1, len(data1))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data1, len(data1))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data2)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data2, len(data2))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data2, len(data2))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data3)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data3, len(data3))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 0)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data3, len(data3))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 0)
total_data_size = len(data4a) + len(data4b)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data5)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data5, len(data5))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
if __name__ == '__main__':
unittest.main()
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The format specification classes."""
class Signature(object):
"""Class that defines a signature of a format specification.
The signature consists of a byte string expression, an optional
offset relative to the start of the data, and a value to indidate
if the expression is bound to the offset.
"""
def __init__(self, expression, offset=None, is_bound=False):
"""Initializes the signature.
Args:
expression: string containing the expression of the signature.
The expression consists of a byte string at the moment
regular expression (regexp) are not supported.
offset: the offset of the signature or None by default. None is used
to indicate the signature has no offset. A positive offset
is relative from the start of the data a negative offset
is relative from the end of the data.
is_bound: boolean value to indicate the signature must be bound to
the offset or False by default.
"""
self.expression = expression
self.offset = offset
self.is_bound = is_bound
class Specification(object):
"""Class that contains a format specification."""
def __init__(self, identifier):
"""Initializes the specification.
Args:
identifier: string containing a unique name for the format.
"""
self.identifier = identifier
self.mime_types = []
self.signatures = []
self.universal_type_identifiers = []
def AddMimeType(self, mime_type):
"""Adds a MIME type."""
self.mime_types.append(mime_type)
def AddNewSignature(self, expression, offset=None, is_bound=False):
"""Adds a signature.
Args:
expression: string containing the expression of the signature.
offset: the offset of the signature or None by default. None is used
to indicate the signature has no offset. A positive offset
is relative from the start of the data a negative offset
is relative from the end of the data.
is_bound: boolean value to indicate the signature must be bound to
the offset or False by default.
"""
self.signatures.append(
Signature(expression, offset=offset, is_bound=is_bound))
def AddUniversalTypeIdentifier(self, universal_type_identifiers):
"""Adds a Universal Type Identifier (UTI)."""
self.universal_type_identifiers.append(universal_type_identifiers)
class SpecificationStore(object):
"""Class that servers as a store for specifications."""
def __init__(self):
"""Initializes the specification store."""
self._format_specifications = {}
@property
def specifications(self):
"""A specifications iterator object."""
return self._format_specifications.itervalues()
def AddNewSpecification(self, identifier):
"""Adds a new specification.
Args:
identifier: a string containing the format identifier,
which should be unique for the store.
Returns:
a instance of Specification.
Raises:
ValueError: if the store already contains a specification with
the same identifier.
"""
if identifier in self._format_specifications:
raise ValueError("specification {0:s} is already defined in "
"store.".format(identifier))
self._format_specifications[identifier] = Specification(identifier)
return self._format_specifications[identifier]
def AddSpecification(self, specification):
"""Adds a specification.
Args:
specification: the specification (instance of Specification).
Raises:
KeyError: if the store already contains a specification with
the same identifier.
"""
if specification.identifier in self._format_specifications:
raise KeyError(
u'Specification {0:s} is already defined in store.'.format(
specification.identifier))
self._format_specifications[specification.identifier] = specification
def ReadFromFileObject(self, unused_file_object):
"""Reads the specification store from a file-like object.
Args:
unused_file_object: A file-like object.
Raises:
RuntimeError: because functionality is not implemented yet.
"""
# TODO: implement this function.
raise RuntimeError(u'Function not implemented.')
def ReadFromFile(self, filename):
"""Reads the specification store from a file.
Args:
filename: The name of the file.
"""
file_object = open(filename, 'r')
self.ReadFromFileObject(file_object)
file_object.close()
+46
View File
@@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the format specification classes."""
import unittest
from plaso.classifier import specification
class SpecificationStoreTest(unittest.TestCase):
"""Class to test the specification store."""
def testAddSpecification(self):
"""Function to test the add specification function."""
store = specification.SpecificationStore()
format_regf = specification.Specification('REGF')
format_regf.AddNewSignature('regf', offset=0)
format_esedb = specification.Specification('ESEDB')
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
store.AddSpecification(format_regf)
store.AddSpecification(format_esedb)
with self.assertRaises(KeyError):
store.AddSpecification(format_regf)
if __name__ == '__main__':
unittest.main()
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared test cases."""
from plaso.classifier import specification
def CreateSpecificationStore():
"""Creates a format specification store for testing purposes.
Returns:
A format specification store (instance of SpecificationStore).
"""
store = specification.SpecificationStore()
test_specification = store.AddNewSpecification('7zip')
test_specification.AddMimeType('application/x-7z-compressed')
test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive')
test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0)
test_specification = store.AddNewSpecification('esedb')
test_specification.AddNewSignature(
'\xef\xcd\xab\x89', offset=4, is_bound=True)
test_specification = store.AddNewSpecification('evt')
test_specification.AddNewSignature(
'\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0,
is_bound=True)
test_specification = store.AddNewSpecification('evtx')
test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('ewf')
test_specification.AddNewSignature(
'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
test_specification = specification.Specification('ewf_logical')
test_specification.AddNewSignature(
'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('lnk')
test_specification.AddNewSignature(
'\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
'\x00\x00\x00\x46', offset=0)
test_specification = store.AddNewSpecification('msiecf_index_dat')
test_specification.AddNewSignature(
'Client UrlCache MMF Ver ', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('nk2')
test_specification.AddNewSignature(
'\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0,
is_bound=True)
test_specification = store.AddNewSpecification('olecf')
test_specification.AddNewSignature(
'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True)
test_specification.AddNewSignature(
'\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('pff')
test_specification.AddNewSignature('!BDN', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('qcow')
test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('rar')
test_specification.AddMimeType('application/x-rar-compressed')
test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive')
test_specification.AddNewSignature(
'Rar!\x1a\x07\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('regf')
test_specification.AddNewSignature('regf', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('thumbache_db_cache')
test_specification.AddNewSignature('CMMM', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('thumbache_db_index')
test_specification.AddNewSignature('IMMM', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('zip')
test_specification.AddMimeType('application/zip')
test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive')
# WinZip 8 signature.
test_specification.AddNewSignature('PK00', offset=0, is_bound=True)
test_specification.AddNewSignature('PK\x01\x02')
test_specification.AddNewSignature('PK\x03\x04', offset=0)
test_specification.AddNewSignature('PK\x05\x05')
# Will be at offset 0 when the archive is empty.
test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True)
test_specification.AddNewSignature('PK\x06\x06')
test_specification.AddNewSignature('PK\x06\x07')
test_specification.AddNewSignature('PK\x06\x08')
# Will be at offset 0 when this is spanned archive.
test_specification.AddNewSignature('PK\x07\x08')
return store