plaso-rubanetra/plaso/lib/objectfilter.py

#!/usr/bin/env python
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Originally copied from the GRR project:
# http://code.google.com/p/grr/source/browse/lib/objectfilter.py
# Copied on 11/15/2012
# Minor changes made to make it work in plaso.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to perform filtering of objects based on their data members.

Given a list of objects and a textual filter expression, these classes allow
you to determine which objects match the filter. The system has two main
pieces: A parser for the supported grammar and a filter implementation.

Given any complying user-supplied grammar, it is parsed with a custom lexer
based on GRR's lexer and then compiled into an actual implementation by using
the filter implementation. A filter implementation simply provides actual
implementations for the primitives required to perform filtering. The compiled
result is always a class supporting the Filter interface.

If we define a class called Car such as:


class Car(object):
  def __init__(self, code, color="white", doors=3):
    self.code = code
    self.color = color
    self.doors = 3

And we have two instances:

  ford_ka = Car("FORDKA1", color="grey")
  toyota_corolla = Car("COROLLA1", color="white", doors=5)
  fleet = [ford_ka, toyota_corolla]

We want to find cars that are grey and have 3 or more doors. We could filter
our fleet like this:

  criteria = "(color is grey) and (doors >= 3)"
  parser = ContextFilterParser(criteria).Parse()
  compiled_filter = parser.Compile(LowercaseAttributeFilterImp)

  for car in fleet:
    if compiled_filter.Matches(car):
      print "Car %s matches the supplied filter." % car.code

The filter expression contains two subexpressions joined by an AND operator:
  "color is grey" and "doors >= 3"
This means we want to search for objects matching these two subexpressions.
Let's analyze the first one in depth "color is grey":

  "color": the left operand specifies a search path to look for the data. This
  tells our filtering system to look for the color property on passed objects.
  "is": the operator. Values retrieved for the "color" property will be checked
  against the right operand to see if they are equal.
  "grey": the right operand. It specifies an explicit value to check for.

So each time an object is passed through the filter, it will expand the value
of the color data member, and compare its value against "grey".

Because data members of objects are often not simple datatypes but other
objects, the system allows you to reference data members within other data
members by separating each by a dot. Let's see an example:

Let's add a more complex Car class with default tyre data:


class CarWithTyres(Car):
  def __init__(self, code, tyres=None, color="white", doors=3):
    super(self, CarWithTyres).__init__(code, color, doors)
    tyres = tyres or Tyre("Pirelli", "PZERO")


class Tyre(object):
  def __init__(self, brand, code):
    self.brand = brand
    self.code = code

And two new instances:
  ford_ka = CarWithTyres("FORDKA", color="grey", tyres=Tyre("AVON", "ZT5"))
  toyota_corolla = Car("COROLLA1", color="white", doors=5)
  fleet = [ford_ka, toyota_corolla]

To filter a car based on the tyre brand, we would use a search path of
"tyres.brand".

Because the filter implementation provides the actual classes that perform
handling of the search paths, operators, etc. customizing the behaviour of the
filter is easy. Three basic filter implementations are given:
  BaseFilterImplementation: search path expansion is done on attribute names
  as provided (case-sensitive).
  LowercaseAttributeFilterImp: search path expansion is done on the lowercased
  attribute name, so that it only accesses attributes, not methods.
  DictFilterImplementation: search path expansion is done on dictionary access
  to the given object. So "a.b" expands the object obj to obj["a"]["b"]
"""

import abc
import binascii
import logging
import re

from plaso.lib import lexer
from plaso.lib import utils


class Error(Exception):
  """Base module exception."""


class MalformedQueryError(Error):
  """The provided filter query is malformed."""


class ParseError(Error):
  """The parser for textual queries returned invalid results."""


class InvalidNumberOfOperands(Error):
  """The number of operands provided to this operator is wrong."""


class Filter(object):
  """Base class for every filter."""

  def __init__(self, arguments=None, value_expander=None):
    """Constructor.

    Args:
      arguments: Arguments to the filter.
      value_expander: A callable that will be used to expand values for the
      objects passed to this filter. Implementations expanders are provided by
      subclassing ValueExpander.

    Raises:
      Error: If the given value_expander is not a subclass of ValueExpander
    """
    self.value_expander = None
    self.value_expander_cls = value_expander
    if self.value_expander_cls:
      if not issubclass(self.value_expander_cls, ValueExpander):
        raise Error(u'{0:s} is not a valid value expander'.format(
            self.value_expander_cls))
      self.value_expander = self.value_expander_cls()
    self.args = arguments or []
    logging.debug(u'Adding {0:s}'.format(arguments))

  @abc.abstractmethod
  def Matches(self, obj):
    """Whether object obj matches this filter."""

  def Filter(self, objects):
    """Returns a list of objects that pass the filter."""
    return filter(self.Matches, objects)

  def __str__(self):
    return '{0:s}({1:s})'.format(
        self.__class__.__name__, ', '.join([str(arg) for arg in self.args]))


class AndFilter(Filter):
  """Performs a boolean AND of the given Filter instances as arguments.

    Note that if no conditions are passed, all objects will pass.
  """
  def Matches(self, obj):
    for child_filter in self.args:
      if not child_filter.Matches(obj):
        return False
    return True


class OrFilter(Filter):
  """Performs a boolean OR of the given Filter instances as arguments.

  Note that if no conditions are passed, all objects will pass.
  """
  def Matches(self, obj):
    if not self.args:
      return True

    for child_filter in self.args:
      if child_filter.Matches(obj):
        return True
    return False


# pylint: disable=abstract-method
class Operator(Filter):
  """Base class for all operators."""


class IdentityFilter(Operator):
  def Matches(self, _):
    return True


class UnaryOperator(Operator):
  """Base class for unary operators."""

  def __init__(self, operand, **kwargs):
    """Constructor."""
    super(UnaryOperator, self).__init__(arguments=[operand], **kwargs)
    if len(self.args) != 1:
      raise InvalidNumberOfOperands(
          u'Only one operand is accepted by {0:s}. Received {1:d}.'.format(
              self.__class__.__name__, len(self.args)))


class BinaryOperator(Operator):
  """Base class for binary operators.

  The left operand is always a path into the object which will be expanded for
  values. The right operand is a value defined at initialization and is stored
  at self.right_operand.
  """
  def __init__(self, arguments=None, **kwargs):
    super(BinaryOperator, self).__init__(arguments=arguments, **kwargs)
    if len(self.args) != 2:
      raise InvalidNumberOfOperands(
          u'Only two operands are accepted by {0:s}. Received {1:s}.'.format(
              self.__class__.__name__, len(self.args)))

    self.left_operand = self.args[0]
    self.right_operand = self.args[1]


class GenericBinaryOperator(BinaryOperator):
  """Allows easy implementations of operators."""

  def __init__(self, **kwargs):
    super(GenericBinaryOperator, self).__init__(**kwargs)
    self.bool_value = True

  def FlipBool(self):
    logging.debug(u'Negative matching.')
    self.bool_value = not self.bool_value

  def Operation(self, x, y):
    """Performs the operation between two values."""

  def Operate(self, values):
    """Takes a list of values and if at least one matches, returns True."""
    for val in values:
      try:
        if self.Operation(val, self.right_operand):
          return True
        else:
          continue
      except (ValueError, TypeError):
        continue
    return False

  def Matches(self, obj):
    key = self.left_operand
    values = self.value_expander.Expand(obj, key)
    if values and self.Operate(values):
      return self.bool_value
    return not self.bool_value


class Equals(GenericBinaryOperator):
  """Matches objects when the right operand equals the expanded value."""

  def Operation(self, x, y):
    return x == y


class NotEquals(Equals):
  """Matches when the right operand isn't equal to the expanded value."""

  def __init__(self, **kwargs):
    super(NotEquals, self).__init__(**kwargs)
    self.bool_value = False


class Less(GenericBinaryOperator):
  """Whether the expanded value >= right_operand."""

  def Operation(self, x, y):
    return x < y


class LessEqual(GenericBinaryOperator):
  """Whether the expanded value <= right_operand."""

  def Operation(self, x, y):
    return x <= y


class Greater(GenericBinaryOperator):
  """Whether the expanded value > right_operand."""

  def Operation(self, x, y):
    return x > y


class GreaterEqual(GenericBinaryOperator):
  """Whether the expanded value >= right_operand."""

  def Operation(self, x, y):
    return x >= y


class Contains(GenericBinaryOperator):
  """Whether the right operand is contained in the value."""

  def Operation(self, x, y):
    if type(x) in (str, unicode):
      return y.lower() in x.lower()

    return y in x


class InSet(GenericBinaryOperator):
  # TODO(user): Change to an N-ary Operator?
  """Whether all values are contained within the right operand."""

  def Operation(self, x, y):
    """Whether x is fully contained in y."""
    if x in y:
      return True

    # x might be an iterable
    # first we need to skip strings or we'll do silly things
    if (isinstance(x, basestring)
        or isinstance(x, bytes)):
      return False

    try:
      for value in x:
        if value not in y:
          return False
      return True
    except TypeError:
      return False


class Regexp(GenericBinaryOperator):
  """Whether the value matches the regexp in the right operand."""

  def __init__(self, *children, **kwargs):
    super(Regexp, self).__init__(*children, **kwargs)
    # Note that right_operand is not necessarily a string.
    logging.debug(u'Compiled: {0!s}'.format(self.right_operand))
    try:
      self.compiled_re = re.compile(
          utils.GetUnicodeString(self.right_operand), re.DOTALL)
    except re.error:
      raise ValueError(u'Regular expression "{0!s}" is malformed.'.format(
          self.right_operand))

  def Operation(self, x, unused_y):
    try:
      if self.compiled_re.search(utils.GetUnicodeString(x)):
        return True
    except TypeError:
      pass

    return False


class RegexpInsensitive(Regexp):
  """Whether the value matches the regexp in the right operand."""

  def __init__(self, *children, **kwargs):
    super(RegexpInsensitive, self).__init__(*children, **kwargs)
    # Note that right_operand is not necessarily a string.
    logging.debug(u'Compiled: {0!s}'.format(self.right_operand))
    try:
      self.compiled_re = re.compile(utils.GetUnicodeString(self.right_operand),
                                    re.I | re.DOTALL)
    except re.error:
      raise ValueError(u'Regular expression "{0!s}" is malformed.'.format(
          self.right_operand))


class Context(Operator):
  """Restricts the child operators to a specific context within the object.

  Solves the context problem. The context problem is the following:
  Suppose you store a list of loaded DLLs within a process. Suppose that for
  each of these DLLs you store the number of imported functions and each of the
  imported functions name.

  Imagine that a malicious DLL is injected into processes and its indicators are
  that it only imports one function and that it is RegQueryValueEx. You'd write
  your indicator like this:


  AndOperator(
    Equal("ImportedDLLs.ImpFunctions.Name", "RegQueryValueEx"),
    Equal("ImportedDLLs.NumImpFunctions", "1")
    )

  Now imagine you have these two processes on a given system.

  Process1
  +[0]__ImportedDlls
        +[0]__Name: "notevil.dll"
        |[0]__ImpFunctions
        |     +[1]__Name: "CreateFileA"
        |[0]__NumImpFunctions: 1
        |
        +[1]__Name: "alsonotevil.dll"
        |[1]__ImpFunctions
        |     +[0]__Name: "RegQueryValueEx"
        |     +[1]__Name: "CreateFileA"
        |[1]__NumImpFunctions: 2

  Process2
  +[0]__ImportedDlls
        +[0]__Name: "evil.dll"
        |[0]__ImpFunctions
        |     +[0]__Name: "RegQueryValueEx"
        |[0]__NumImpFunctions: 1

  Both Process1 and Process2 match your query, as each of the indicators are
  evaluated separatedly. While you wanted to express "find me processes that
  have a DLL that has both one imported function and ReqQueryValueEx is in the
  list of imported functions", your indicator actually means "find processes
  that have at least a DLL with 1 imported functions and at least one DLL that
  imports the ReqQueryValueEx function".

  To write such an indicator you need to specify a context of ImportedDLLs for
  these two clauses. Such that you convert your indicator to:

  Context("ImportedDLLs",
          AndOperator(
            Equal("ImpFunctions.Name", "RegQueryValueEx"),
            Equal("NumImpFunctions", "1")
          ))

  Context will execute the filter specified as the second parameter for each of
  the objects under "ImportedDLLs", thus applying the condition per DLL, not per
  object and returning the right result.
  """

  def __init__(self, arguments=None, **kwargs):
    if len(arguments) != 2:
      raise InvalidNumberOfOperands(u'Context accepts only 2 operands.')
    super(Context, self).__init__(arguments=arguments, **kwargs)
    self.context, self.condition = self.args

  def Matches(self, obj):
    for object_list in self.value_expander.Expand(obj, self.context):
      for sub_object in object_list:
        if self.condition.Matches(sub_object):
          return True
    return False


OP2FN = {
    'equals': Equals,
    'is': Equals,
    '==': Equals,
    '!=': NotEquals,
    'contains': Contains,
    '>': Greater,
    '>=': GreaterEqual,
    '<': Less,
    '<=': LessEqual,
    'inset': InSet,
    'regexp': Regexp,
    'iregexp': RegexpInsensitive}


class ValueExpander(object):
  """Encapsulates the logic to expand values available in an object.

  Once instantiated and called, this class returns all the values that follow a
  given field path.
  """

  FIELD_SEPARATOR = '.'

  def _GetAttributeName(self, path):
    """Returns the attribute name to fetch given a path."""
    return path[0]

  def _GetValue(self, unused_obj, unused_attr_name):
    """Returns the value of tha attribute attr_name."""
    raise NotImplementedError()

  def _AtLeaf(self, attr_value):
    """Called when at a leaf value. Should yield a value."""
    yield attr_value

  def _AtNonLeaf(self, attr_value, path):
    """Called when at a non-leaf value. Should recurse and yield values."""
    try:
      # Check first for iterables
      # If it's a dictionary, we yield it
      if isinstance(attr_value, dict):
        yield attr_value
      else:
        # If it's an iterable, we recurse on each value.
        for sub_obj in attr_value:
          for value in self.Expand(sub_obj, path[1:]):
            yield value
    except TypeError:  # This is then not iterable, we recurse with the value
      for value in self.Expand(attr_value, path[1:]):
        yield value

  def Expand(self, obj, path):
    """Returns a list of all the values for the given path in the object obj.

    Given a path such as ["sub1", "sub2"] it returns all the values available
    in obj.sub1.sub2 as a list. sub1 and sub2 must be data attributes or
    properties.

    If sub1 returns a list of objects, or a generator, Expand aggregates the
    values for the remaining path for each of the objects, thus returning a
    list of all the values under the given path for the input object.

    Args:
      obj: An object that will be traversed for the given path
      path: A list of strings

    Yields:
      The values once the object is traversed.
    """
    if isinstance(path, basestring):
      path = path.split(self.FIELD_SEPARATOR)

    attr_name = self._GetAttributeName(path)
    attr_value = self._GetValue(obj, attr_name)
    if attr_value is None:
      return

    if len(path) == 1:
      for value in self._AtLeaf(attr_value):
        yield value
    else:
      for value in self._AtNonLeaf(attr_value, path):
        yield value


class AttributeValueExpander(ValueExpander):
  """An expander that gives values based on object attribute names."""

  def _GetValue(self, obj, attr_name):
    return getattr(obj, attr_name, None)


class LowercaseAttributeValueExpander(AttributeValueExpander):
  """An expander that lowercases all attribute names before access."""

  def _GetAttributeName(self, path):
    return path[0].lower()


class DictValueExpander(ValueExpander):
  """An expander that gets values from dictionary access to the object."""

  def _GetValue(self, obj, attr_name):
    return obj.get(attr_name, None)


class BasicExpression(lexer.Expression):
  """Basic Expression."""

  def __init__(self):
    super(BasicExpression, self).__init__()
    self.bool_value = True

  def FlipBool(self):
    self.bool_value = not self.bool_value

  def Compile(self, filter_implementation):
    arguments = [self.attribute]
    op_str = self.operator.lower()
    operator = filter_implementation.OPS.get(op_str, None)

    if not operator:
      raise ParseError(u'Unknown operator {0:s} provided.'.format(
          self.operator))

    arguments.extend(self.args)
    expander = filter_implementation.FILTERS['ValueExpander']
    ops = operator(arguments=arguments, value_expander=expander)
    if not self.bool_value:
      if hasattr(ops, 'FlipBool'):
        ops.FlipBool()

    return ops


class ContextExpression(lexer.Expression):
  """Represents the context operator."""

  def __init__(self, attribute="", part=None):
    self.attribute = attribute
    self.args = []
    if part:
      self.args.append(part)
    super(ContextExpression, self).__init__()

  def __str__(self):
    return 'Context({0:s} {1:s})'.format(
        self.attribute, [str(x) for x in self.args])

  def SetExpression(self, expression):
    """Set the expression."""
    if isinstance(expression, lexer.Expression):
      self.args = [expression]
    else:
      raise ParseError(u'Expected expression, got {0:s}.'.format(expression))

  def Compile(self, filter_implementation):
    """Compile the expression."""
    arguments = [self.attribute]
    for arg in self.args:
      arguments.append(arg.Compile(filter_implementation))
    expander = filter_implementation.FILTERS['ValueExpander']
    context_cls = filter_implementation.FILTERS['Context']
    return context_cls(arguments=arguments,
                       value_expander=expander)


class BinaryExpression(lexer.BinaryExpression):
  def Compile(self, filter_implementation):
    """Compile the binary expression into a filter object."""
    operator = self.operator.lower()
    if operator == 'and' or operator == '&&':
      method = 'AndFilter'
    elif operator == 'or' or operator == '||':
      method = 'OrFilter'
    else:
      raise ParseError(u'Invalid binary operator {0:s}.'.format(operator))

    args = [x.Compile(filter_implementation) for x in self.args]
    return filter_implementation.FILTERS[method](arguments=args)


class Parser(lexer.SearchParser):
  """Parses and generates an AST for a query written in the described language.

  Examples of valid syntax:
    size is 40
    (name contains "Program Files" AND hash.md5 is "123abc")
    @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow")
  """
  expression_cls = BasicExpression
  binary_expression_cls = BinaryExpression
  context_cls = ContextExpression

  tokens = [
      # Operators and related tokens
      lexer.Token('INITIAL', r'\@[\w._0-9]+',
                  'ContextOperator,PushState', 'CONTEXTOPEN'),
      lexer.Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'),
      lexer.Token('INITIAL', r'\(', 'PushState,BracketOpen', None),
      lexer.Token('INITIAL', r'\)', 'BracketClose', 'BINARY'),

      # Context
      lexer.Token('CONTEXTOPEN', r'\(', 'BracketOpen', 'INITIAL'),

      # Double quoted string
      lexer.Token('STRING', '"', 'PopState,StringFinish', None),
      lexer.Token('STRING', r'\\x(..)', 'HexEscape', None),
      lexer.Token('STRING', r'\\(.)', 'StringEscape', None),
      lexer.Token('STRING', r'[^\\"]+', 'StringInsert', None),

      # Single quoted string
      lexer.Token('SQ_STRING', '\'', 'PopState,StringFinish', None),
      lexer.Token('SQ_STRING', r'\\x(..)', 'HexEscape', None),
      lexer.Token('SQ_STRING', r'\\(.)', 'StringEscape', None),
      lexer.Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None),

      # Basic expression
      lexer.Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'),
      lexer.Token('OPERATOR', r'not ', 'FlipLogic', None),
      lexer.Token('OPERATOR', r'(\w+|[<>!=]=?)', 'StoreOperator', 'CHECKNOT'),
      lexer.Token('CHECKNOT', r'not', 'FlipLogic', 'ARG'),
      lexer.Token('CHECKNOT', r'\s+', None, None),
      lexer.Token('CHECKNOT', r'([^not])', 'PushBack', 'ARG'),
      lexer.Token('ARG', r'(\d+\.\d+)', 'InsertFloatArg', 'ARG'),
      lexer.Token('ARG', r'(0x\d+)', 'InsertInt16Arg', 'ARG'),
      lexer.Token('ARG', r'(\d+)', 'InsertIntArg', 'ARG'),
      lexer.Token('ARG', '"', 'PushState,StringStart', 'STRING'),
      lexer.Token('ARG', '\'', 'PushState,StringStart', 'SQ_STRING'),
      # When the last parameter from arg_list has been pushed

      # State where binary operators are supported (AND, OR)
      lexer.Token('BINARY', r'(?i)(and|or|\&\&|\|\|)',
                  'BinaryOperator', 'INITIAL'),
      # - We can also skip spaces
      lexer.Token('BINARY', r'\s+', None, None),
      # - But if it's not "and" or just spaces we have to go back
      lexer.Token('BINARY', '.', 'PushBack,PopState', None),

      # Skip whitespace.
      lexer.Token('.', r'\s+', None, None),
      ]

  def StoreAttribute(self, string='', **kwargs):
    self.flipped = False
    super(Parser, self).StoreAttribute(string, **kwargs)

  def FlipAllowed(self):
    """Raise an error if the not keyword is used where it is not allowed."""
    if not hasattr(self, 'flipped'):
      raise ParseError(u'Not defined.')

    if not self.flipped:
      return

    if self.current_expression.operator:
      if not self.current_expression.operator.lower() in (
          'is', 'contains', 'inset', 'equals'):
        raise ParseError(
            u'Keyword \'not\' does not work against operator: {0:s}'.format(
                self.current_expression.operator))

  def FlipLogic(self, **unused_kwargs):
    """Flip the boolean logic of the expression.

    If an expression is configured to return True when the condition
    is met this logic will flip that to False, and vice versa.
    """
    if hasattr(self, 'flipped') and self.flipped:
      raise ParseError(u'The operator \'not\' can only be expressed once.')

    if self.current_expression.args:
      raise ParseError(
          u'Unable to place the keyword \'not\' after an argument.')

    self.flipped = True

    # Check if this flip operation should be allowed.
    self.FlipAllowed()

    if hasattr(self.current_expression, 'FlipBool'):
      self.current_expression.FlipBool()
      logging.debug(u'Negative matching [flipping boolean logic].')
    else:
      logging.warning(
          u'Unable to perform a negative match, issuing a positive one.')

  def InsertArg(self, string='', **unused_kwargs):
    """Insert an arg to the current expression."""
    # Note that "string" is not necessarily of type string.
    logging.debug(u'Storing argument: {0!s}'.format(string))

    # Check if this flip operation should be allowed.
    self.FlipAllowed()

    # This expression is complete
    if self.current_expression.AddArg(string):
      self.stack.append(self.current_expression)
      self.current_expression = self.expression_cls()
      # We go to the BINARY state, to find if there's an AND or OR operator
      return 'BINARY'

  def InsertFloatArg(self, string='', **unused_kwargs):
    """Inserts a Float argument."""
    try:
      float_value = float(string)
    except (TypeError, ValueError):
      raise ParseError(u'{0:s} is not a valid float.'.format(string))
    return self.InsertArg(float_value)

  def InsertIntArg(self, string='', **unused_kwargs):
    """Inserts an Integer argument."""
    try:
      int_value = int(string)
    except (TypeError, ValueError):
      raise ParseError(u'{0:s} is not a valid integer.'.format(string))
    return self.InsertArg(int_value)

  def InsertInt16Arg(self, string='', **unused_kwargs):
    """Inserts an Integer in base16 argument."""
    try:
      int_value = int(string, 16)
    except (TypeError, ValueError):
      raise ParseError(u'{0:s} is not a valid base16 integer.'.format(string))
    return self.InsertArg(int_value)

  def StringFinish(self, **unused_kwargs):
    if self.state == 'ATTRIBUTE':
      return self.StoreAttribute(string=self.string)

    elif self.state == 'ARG':
      return self.InsertArg(string=self.string)

  def StringEscape(self, string, match, **unused_kwargs):
    """Escape backslashes found inside a string quote.

    Backslashes followed by anything other than [\'"rnbt.ws] will raise
    an Error.

    Args:
      string: The string that matched.
      match: The match object (m.group(1) is the escaped code)

    Raises:
      ParseError: When the escaped string is not one of [\'"rnbt]
    """
    if match.group(1) in '\\\'"rnbt\\.ws':
      self.string += string.decode('string_escape')
    else:
      raise ParseError(u'Invalid escape character {0:s}.'.format(string))

  def HexEscape(self, string, match, **unused_kwargs):
    """Converts a hex escaped string."""
    logging.debug(u'HexEscape matched {0:s}.'.format(string))
    hex_string = match.group(1)
    try:
      self.string += binascii.unhexlify(hex_string)
    except TypeError:
      raise ParseError(u'Invalid hex escape {0:s}.'.format(string))

  def ContextOperator(self, string='', **unused_kwargs):
    self.stack.append(self.context_cls(string[1:]))

  def Reduce(self):
    """Reduce the token stack into an AST."""
    # Check for sanity
    if self.state != 'INITIAL' and self.state != 'BINARY':
      self.Error(u'Premature end of expression')

    length = len(self.stack)
    while length > 1:
      # Precendence order
      self._CombineParenthesis()
      self._CombineBinaryExpressions('and')
      self._CombineBinaryExpressions('or')
      self._CombineContext()

      # No change
      if len(self.stack) == length:
        break
      length = len(self.stack)

    if length != 1:
      self.Error(u'Illegal query expression.')

    return self.stack[0]

  def Error(self, message=None, _=None):
    # Note that none of the values necessarily are strings.
    raise ParseError(u'{0!s} in position {1!s}: {2!s} <----> {3!s} )'.format(
        message, len(self.processed_buffer), self.processed_buffer,
        self.buffer))

  def _CombineBinaryExpressions(self, operator):
    for i in range(1, len(self.stack)-1):
      item = self.stack[i]
      if (isinstance(item, lexer.BinaryExpression) and
          item.operator.lower() == operator.lower() and
          isinstance(self.stack[i-1], lexer.Expression) and
          isinstance(self.stack[i+1], lexer.Expression)):
        lhs = self.stack[i-1]
        rhs = self.stack[i+1]

        self.stack[i].AddOperands(lhs, rhs)
        self.stack[i-1] = None
        self.stack[i+1] = None

    self.stack = filter(None, self.stack)

  def _CombineContext(self):
    # Context can merge from item 0
    for i in range(len(self.stack)-1, 0, -1):
      item = self.stack[i-1]
      if (isinstance(item, ContextExpression) and
          isinstance(self.stack[i], lexer.Expression)):
        expression = self.stack[i]
        self.stack[i-1].SetExpression(expression)
        self.stack[i] = None

    self.stack = filter(None, self.stack)


### FILTER IMPLEMENTATIONS
class BaseFilterImplementation(object):
  """Defines the base implementation of an object filter by its attributes.

  Inherit from this class, switch any of the needed operators and pass it to
  the Compile method of a parsed string to obtain an executable filter.
  """

  OPS = OP2FN
  FILTERS = {
      'ValueExpander': AttributeValueExpander,
      'AndFilter': AndFilter,
      'OrFilter': OrFilter,
      'IdentityFilter': IdentityFilter,
      'Context': Context}


class LowercaseAttributeFilterImplementation(BaseFilterImplementation):
  """Does field name access on the lowercase version of names.

  Useful to only access attributes and properties with Google's python naming
  style.
  """

  FILTERS = {}
  FILTERS.update(BaseFilterImplementation.FILTERS)
  FILTERS.update({'ValueExpander': LowercaseAttributeValueExpander})


class DictFilterImplementation(BaseFilterImplementation):
  """Does value fetching by dictionary access on the object."""

  FILTERS = {}
  FILTERS.update(BaseFilterImplementation.FILTERS)
  FILTERS.update({'ValueExpander': DictValueExpander})