plaso-rubanetra/plaso/classifier/patterns.py
2020-04-06 18:48:34 +02:00

309 lines
9.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The patterns classes used by the scan tree-based format scanner."""
class _ByteValuePatterns(object):
"""Class that implements a mapping between byte value and patterns.
The byte value patterns are used in the scan tree-based format scanner
to map a byte value to one or more patterns.
"""
def __init__(self, byte_value):
"""Initializes the pattern table (entry) byte value.
Args:
byte_value: the byte value that maps the patterns in the table.
"""
super(_ByteValuePatterns, self).__init__()
self.byte_value = byte_value
self.patterns = {}
def __unicode__(self):
"""Retrieves a string representation of the byte value patterns."""
return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
def AddPattern(self, pattern):
"""Adds a pattern.
Args:
pattern: the pattern (instance of Pattern).
Raises:
ValueError: if the table entry already contains a pattern
with the same identifier.
"""
if pattern.identifier in self.patterns:
raise ValueError(u'Pattern {0:s} is already defined.'.format(
pattern.identifier))
self.patterns[pattern.identifier] = pattern
def ToDebugString(self, indentation_level=1):
"""Converts the byte value pattern into a debug string."""
indentation = u' ' * indentation_level
header = u'{0:s}byte value: 0x{1:02x}\n'.format(
indentation, ord(self.byte_value))
entries = u''.join([u'{0:s} patterns: {1:s}\n'.format(
indentation, identifier) for identifier in self.patterns])
return u''.join([header, entries, u'\n'])
class _SkipTable(object):
"""Class that implements a skip table.
The skip table is used in the scan tree-based format scanner to determine
the skip value for the BoyerMooreHorspool search.
"""
def __init__(self, skip_pattern_length):
"""Initializes the skip table.
Args:
skip_pattern_length: the (maximum) skip pattern length.
"""
super(_SkipTable, self).__init__()
self._skip_value_per_byte_value = {}
self.skip_pattern_length = skip_pattern_length
def __getitem__(self, key):
"""Retrieves a specific skip value.
Args:
key: the byte value within the skip table.
Returns:
the skip value for the key or the maximim skip value
if no corresponding key was found.
"""
if key in self._skip_value_per_byte_value:
return self._skip_value_per_byte_value[key]
return self.skip_pattern_length
def SetSkipValue(self, byte_value, skip_value):
"""Sets a skip value.
Args:
byte_value: the corresponding byte value.
skip_value: the number of bytes to skip.
Raises:
ValueError: if byte value or skip value is out of bounds.
"""
if byte_value < 0 or byte_value > 255:
raise ValueError(u'Invalid byte value, value out of bounds.')
if skip_value < 0 or skip_value >= self.skip_pattern_length:
raise ValueError(u'Invalid skip value, value out of bounds.')
if (not byte_value in self._skip_value_per_byte_value or
self._skip_value_per_byte_value[byte_value] > skip_value):
self._skip_value_per_byte_value[byte_value] = skip_value
def ToDebugString(self):
"""Converts the skip table into a debug string."""
header = u'Byte value\tSkip value\n'
entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
byte_value, self._skip_value_per_byte_value[byte_value])
for byte_value in self._skip_value_per_byte_value])
default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
return u''.join([header, entries, default, u'\n'])
class Pattern(object):
"""Class that implements a pattern."""
def __init__(self, signature_index, signature, specification):
"""Initializes the pattern.
Args:
signature_index: the index of the signature within the specification.
signature: the signature (instance of Signature).
specification: the specification (instance of Specification) that
contains the signature.
"""
super(Pattern, self).__init__()
self._signature_index = signature_index
self.signature = signature
self.specification = specification
def __unicode__(self):
"""Retrieves a string representation."""
return self.identifier
@property
def expression(self):
"""The signature expression."""
return self.signature.expression
@property
def identifier(self):
"""The identifier."""
# Using _ here because some scanner implementation are limited to what
# characters can be used in the identifiers.
return u'{0:s}_{1:d}'.format(
self.specification.identifier, self._signature_index)
@property
def offset(self):
"""The signature offset."""
return self.signature.offset
@property
def is_bound(self):
"""Boolean value to indicate the signature is bound to an offset."""
return self.signature.is_bound
class PatternTable(object):
"""Class that implements a pattern table.
The pattern table is used in the the scan tree-based format scanner
to construct a scan tree. It contains either unbound patterns or
patterns bound to a specific offset.
"""
def __init__(self, patterns, ignore_list, is_bound=None):
"""Initializes and builds the patterns table from patterns.
Args:
patterns: a list of the patterns.
ignore_list: a list of pattern offsets to ignore.
is_bound: optional boolean value to indicate if the signatures are bound
to offsets. The default is None, which means the value should
be ignored and both bound and unbound patterns are considered
unbound.
Raises:
ValueError: if a signature pattern is too small to be useful (< 4).
"""
super(PatternTable, self).__init__()
self._byte_values_per_offset = {}
self.largest_pattern_length = 0
self.largest_pattern_offset = 0
self.patterns = []
self.smallest_pattern_length = 0
self.smallest_pattern_offset = 0
for pattern in patterns:
if is_bound is not None and pattern.signature.is_bound != is_bound:
continue
pattern_length = len(pattern.expression)
if pattern_length < 4:
raise ValueError(u'Pattern too small to be useful.')
self.smallest_pattern_length = min(
self.smallest_pattern_length, pattern_length)
self.largest_pattern_length = max(
self.largest_pattern_length, pattern_length)
self.patterns.append(pattern)
self._AddPattern(pattern, ignore_list, is_bound)
def _AddPattern(self, pattern, ignore_list, is_bound):
"""Adds the byte values per offset in the pattern to the table.
Args:
pattern: the pattern (instance of Pattern).
ignore_list: a list of pattern offsets to ignore.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
"""
pattern_offset = pattern.offset if is_bound else 0
self.smallest_pattern_offset = min(
self.smallest_pattern_offset, pattern_offset)
self.largest_pattern_offset = max(
self.largest_pattern_offset, pattern_offset)
for byte_value in pattern.expression:
if pattern_offset not in self._byte_values_per_offset:
self._byte_values_per_offset[pattern_offset] = {}
if pattern_offset not in ignore_list:
byte_values = self._byte_values_per_offset[pattern_offset]
if byte_value not in byte_values:
byte_values[byte_value] = _ByteValuePatterns(byte_value)
byte_value_patterns = byte_values[byte_value]
byte_value_patterns.AddPattern(pattern)
pattern_offset += 1
@property
def offsets(self):
"""The offsets."""
return self._byte_values_per_offset.keys()
def GetByteValues(self, pattern_offset):
"""Returns the bytes values for a specific pattern offset."""
return self._byte_values_per_offset[pattern_offset]
def GetSkipTable(self):
"""Retrieves the skip table for the patterns in the table.
Returns:
The skip table (instance of SkipTable).
"""
skip_table = _SkipTable(self.smallest_pattern_length)
for pattern in self.patterns:
if pattern.expression:
skip_value = self.smallest_pattern_length
for expression_index in range(0, self.smallest_pattern_length):
skip_value -= 1
skip_table.SetSkipValue(
ord(pattern.expression[expression_index]), skip_value)
return skip_table
def ToDebugString(self):
"""Converts the pattern table into a debug string."""
header = u'Pattern offset\tByte value(s)\n'
entries = u''
for pattern_offset in self._byte_values_per_offset:
entries += u'{0:d}'.format(pattern_offset)
byte_values = self._byte_values_per_offset[pattern_offset]
for byte_value in byte_values:
identifiers = u', '.join(
[identifier for identifier in byte_values[byte_value].patterns])
entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
entries += u'\n'
return u''.join([header, entries, u'\n'])