309 lines
9.8 KiB
Python
309 lines
9.8 KiB
Python
#!/usr/bin/python
|
||
# -*- coding: utf-8 -*-
|
||
#
|
||
# Copyright 2013 The Plaso Project Authors.
|
||
# Please see the AUTHORS file for details on individual authors.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
"""The patterns classes used by the scan tree-based format scanner."""
|
||
|
||
|
||
class _ByteValuePatterns(object):
|
||
"""Class that implements a mapping between byte value and patterns.
|
||
|
||
The byte value patterns are used in the scan tree-based format scanner
|
||
to map a byte value to one or more patterns.
|
||
"""
|
||
|
||
def __init__(self, byte_value):
|
||
"""Initializes the pattern table (entry) byte value.
|
||
|
||
Args:
|
||
byte_value: the byte value that maps the patterns in the table.
|
||
"""
|
||
super(_ByteValuePatterns, self).__init__()
|
||
self.byte_value = byte_value
|
||
self.patterns = {}
|
||
|
||
def __unicode__(self):
|
||
"""Retrieves a string representation of the byte value patterns."""
|
||
return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
|
||
|
||
def AddPattern(self, pattern):
|
||
"""Adds a pattern.
|
||
|
||
Args:
|
||
pattern: the pattern (instance of Pattern).
|
||
|
||
Raises:
|
||
ValueError: if the table entry already contains a pattern
|
||
with the same identifier.
|
||
"""
|
||
if pattern.identifier in self.patterns:
|
||
raise ValueError(u'Pattern {0:s} is already defined.'.format(
|
||
pattern.identifier))
|
||
|
||
self.patterns[pattern.identifier] = pattern
|
||
|
||
def ToDebugString(self, indentation_level=1):
|
||
"""Converts the byte value pattern into a debug string."""
|
||
indentation = u' ' * indentation_level
|
||
|
||
header = u'{0:s}byte value: 0x{1:02x}\n'.format(
|
||
indentation, ord(self.byte_value))
|
||
|
||
entries = u''.join([u'{0:s} patterns: {1:s}\n'.format(
|
||
indentation, identifier) for identifier in self.patterns])
|
||
|
||
return u''.join([header, entries, u'\n'])
|
||
|
||
|
||
class _SkipTable(object):
|
||
"""Class that implements a skip table.
|
||
|
||
The skip table is used in the scan tree-based format scanner to determine
|
||
the skip value for the Boyer–Moore–Horspool search.
|
||
"""
|
||
|
||
def __init__(self, skip_pattern_length):
|
||
"""Initializes the skip table.
|
||
|
||
Args:
|
||
skip_pattern_length: the (maximum) skip pattern length.
|
||
"""
|
||
super(_SkipTable, self).__init__()
|
||
self._skip_value_per_byte_value = {}
|
||
self.skip_pattern_length = skip_pattern_length
|
||
|
||
def __getitem__(self, key):
|
||
"""Retrieves a specific skip value.
|
||
|
||
Args:
|
||
key: the byte value within the skip table.
|
||
|
||
Returns:
|
||
the skip value for the key or the maximim skip value
|
||
if no corresponding key was found.
|
||
"""
|
||
if key in self._skip_value_per_byte_value:
|
||
return self._skip_value_per_byte_value[key]
|
||
return self.skip_pattern_length
|
||
|
||
def SetSkipValue(self, byte_value, skip_value):
|
||
"""Sets a skip value.
|
||
|
||
Args:
|
||
byte_value: the corresponding byte value.
|
||
skip_value: the number of bytes to skip.
|
||
|
||
Raises:
|
||
ValueError: if byte value or skip value is out of bounds.
|
||
"""
|
||
if byte_value < 0 or byte_value > 255:
|
||
raise ValueError(u'Invalid byte value, value out of bounds.')
|
||
|
||
if skip_value < 0 or skip_value >= self.skip_pattern_length:
|
||
raise ValueError(u'Invalid skip value, value out of bounds.')
|
||
|
||
if (not byte_value in self._skip_value_per_byte_value or
|
||
self._skip_value_per_byte_value[byte_value] > skip_value):
|
||
self._skip_value_per_byte_value[byte_value] = skip_value
|
||
|
||
def ToDebugString(self):
|
||
"""Converts the skip table into a debug string."""
|
||
header = u'Byte value\tSkip value\n'
|
||
|
||
entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
|
||
byte_value, self._skip_value_per_byte_value[byte_value])
|
||
for byte_value in self._skip_value_per_byte_value])
|
||
|
||
default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
|
||
|
||
return u''.join([header, entries, default, u'\n'])
|
||
|
||
|
||
class Pattern(object):
|
||
"""Class that implements a pattern."""
|
||
|
||
def __init__(self, signature_index, signature, specification):
|
||
"""Initializes the pattern.
|
||
|
||
Args:
|
||
signature_index: the index of the signature within the specification.
|
||
signature: the signature (instance of Signature).
|
||
specification: the specification (instance of Specification) that
|
||
contains the signature.
|
||
"""
|
||
super(Pattern, self).__init__()
|
||
self._signature_index = signature_index
|
||
self.signature = signature
|
||
self.specification = specification
|
||
|
||
def __unicode__(self):
|
||
"""Retrieves a string representation."""
|
||
return self.identifier
|
||
|
||
@property
|
||
def expression(self):
|
||
"""The signature expression."""
|
||
return self.signature.expression
|
||
|
||
@property
|
||
def identifier(self):
|
||
"""The identifier."""
|
||
# Using _ here because some scanner implementation are limited to what
|
||
# characters can be used in the identifiers.
|
||
return u'{0:s}_{1:d}'.format(
|
||
self.specification.identifier, self._signature_index)
|
||
|
||
@property
|
||
def offset(self):
|
||
"""The signature offset."""
|
||
return self.signature.offset
|
||
|
||
@property
|
||
def is_bound(self):
|
||
"""Boolean value to indicate the signature is bound to an offset."""
|
||
return self.signature.is_bound
|
||
|
||
|
||
class PatternTable(object):
|
||
"""Class that implements a pattern table.
|
||
|
||
The pattern table is used in the the scan tree-based format scanner
|
||
to construct a scan tree. It contains either unbound patterns or
|
||
patterns bound to a specific offset.
|
||
"""
|
||
|
||
def __init__(self, patterns, ignore_list, is_bound=None):
|
||
"""Initializes and builds the patterns table from patterns.
|
||
|
||
Args:
|
||
patterns: a list of the patterns.
|
||
ignore_list: a list of pattern offsets to ignore.
|
||
is_bound: optional boolean value to indicate if the signatures are bound
|
||
to offsets. The default is None, which means the value should
|
||
be ignored and both bound and unbound patterns are considered
|
||
unbound.
|
||
|
||
Raises:
|
||
ValueError: if a signature pattern is too small to be useful (< 4).
|
||
"""
|
||
super(PatternTable, self).__init__()
|
||
self._byte_values_per_offset = {}
|
||
self.largest_pattern_length = 0
|
||
self.largest_pattern_offset = 0
|
||
self.patterns = []
|
||
self.smallest_pattern_length = 0
|
||
self.smallest_pattern_offset = 0
|
||
|
||
for pattern in patterns:
|
||
if is_bound is not None and pattern.signature.is_bound != is_bound:
|
||
continue
|
||
|
||
pattern_length = len(pattern.expression)
|
||
|
||
if pattern_length < 4:
|
||
raise ValueError(u'Pattern too small to be useful.')
|
||
|
||
self.smallest_pattern_length = min(
|
||
self.smallest_pattern_length, pattern_length)
|
||
self.largest_pattern_length = max(
|
||
self.largest_pattern_length, pattern_length)
|
||
|
||
self.patterns.append(pattern)
|
||
|
||
self._AddPattern(pattern, ignore_list, is_bound)
|
||
|
||
def _AddPattern(self, pattern, ignore_list, is_bound):
|
||
"""Adds the byte values per offset in the pattern to the table.
|
||
|
||
Args:
|
||
pattern: the pattern (instance of Pattern).
|
||
ignore_list: a list of pattern offsets to ignore.
|
||
is_bound: boolean value to indicate if the signatures are bound
|
||
to offsets. A value of None indicates that the value should
|
||
be ignored and both bound and unbound patterns are considered
|
||
unbound.
|
||
"""
|
||
pattern_offset = pattern.offset if is_bound else 0
|
||
|
||
self.smallest_pattern_offset = min(
|
||
self.smallest_pattern_offset, pattern_offset)
|
||
self.largest_pattern_offset = max(
|
||
self.largest_pattern_offset, pattern_offset)
|
||
|
||
for byte_value in pattern.expression:
|
||
if pattern_offset not in self._byte_values_per_offset:
|
||
self._byte_values_per_offset[pattern_offset] = {}
|
||
|
||
if pattern_offset not in ignore_list:
|
||
byte_values = self._byte_values_per_offset[pattern_offset]
|
||
|
||
if byte_value not in byte_values:
|
||
byte_values[byte_value] = _ByteValuePatterns(byte_value)
|
||
|
||
byte_value_patterns = byte_values[byte_value]
|
||
|
||
byte_value_patterns.AddPattern(pattern)
|
||
|
||
pattern_offset += 1
|
||
|
||
@property
|
||
def offsets(self):
|
||
"""The offsets."""
|
||
return self._byte_values_per_offset.keys()
|
||
|
||
def GetByteValues(self, pattern_offset):
|
||
"""Returns the bytes values for a specific pattern offset."""
|
||
return self._byte_values_per_offset[pattern_offset]
|
||
|
||
def GetSkipTable(self):
|
||
"""Retrieves the skip table for the patterns in the table.
|
||
|
||
Returns:
|
||
The skip table (instance of SkipTable).
|
||
"""
|
||
skip_table = _SkipTable(self.smallest_pattern_length)
|
||
|
||
for pattern in self.patterns:
|
||
if pattern.expression:
|
||
skip_value = self.smallest_pattern_length
|
||
|
||
for expression_index in range(0, self.smallest_pattern_length):
|
||
skip_value -= 1
|
||
skip_table.SetSkipValue(
|
||
ord(pattern.expression[expression_index]), skip_value)
|
||
|
||
return skip_table
|
||
|
||
def ToDebugString(self):
|
||
"""Converts the pattern table into a debug string."""
|
||
header = u'Pattern offset\tByte value(s)\n'
|
||
entries = u''
|
||
|
||
for pattern_offset in self._byte_values_per_offset:
|
||
entries += u'{0:d}'.format(pattern_offset)
|
||
|
||
byte_values = self._byte_values_per_offset[pattern_offset]
|
||
|
||
for byte_value in byte_values:
|
||
identifiers = u', '.join(
|
||
[identifier for identifier in byte_values[byte_value].patterns])
|
||
|
||
entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
|
||
|
||
entries += u'\n'
|
||
|
||
return u''.join([header, entries, u'\n'])
|