1100 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1100 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/python
 | 
						|
# -*- coding: utf-8 -*-
 | 
						|
#
 | 
						|
# Copyright 2012 The Plaso Project Authors.
 | 
						|
# Please see the AUTHORS file for details on individual authors.
 | 
						|
#
 | 
						|
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
# you may not use this file except in compliance with the License.
 | 
						|
# You may obtain a copy of the License at
 | 
						|
#
 | 
						|
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#
 | 
						|
# Unless required by applicable law or agreed to in writing, software
 | 
						|
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
# See the License for the specific language governing permissions and
 | 
						|
# limitations under the License.
 | 
						|
"""This file contains a class to provide a parsing framework to plaso.
 | 
						|
 | 
						|
This class contains a base framework class for parsing fileobjects, and
 | 
						|
also some implementations that extend it to provide a more comprehensive
 | 
						|
parser.
 | 
						|
"""
 | 
						|
 | 
						|
import abc
 | 
						|
import csv
 | 
						|
import logging
 | 
						|
import os
 | 
						|
 | 
						|
from dfvfs.helpers import text_file
 | 
						|
import pyparsing
 | 
						|
 | 
						|
from plaso.events import text_events
 | 
						|
from plaso.lib import errors
 | 
						|
from plaso.lib import event
 | 
						|
from plaso.lib import lexer
 | 
						|
from plaso.lib import timelib
 | 
						|
from plaso.lib import utils
 | 
						|
from plaso.parsers import interface
 | 
						|
 | 
						|
import pytz
 | 
						|
 | 
						|
# Pylint complains about some functions not being implemented that shouldn't
 | 
						|
# be since they need to be implemented by children.
 | 
						|
# pylint: disable=abstract-method
 | 
						|
 | 
						|
 | 
						|
class SlowLexicalTextParser(interface.BaseParser, lexer.SelfFeederMixIn):
 | 
						|
  """Generic text based parser that uses lexer to assist with parsing.
 | 
						|
 | 
						|
  This text parser is based on a rather slow lexer, which makes the
 | 
						|
  use of this interface highly discouraged. Parsers that already
 | 
						|
  implement it will most likely all be rewritten to support faster
 | 
						|
  text parsing implementations.
 | 
						|
 | 
						|
  This text based parser needs to be extended to provide an accurate
 | 
						|
  list of tokens that define the structure of the log file that the
 | 
						|
  parser is designed for.
 | 
						|
  """
 | 
						|
 | 
						|
  # Define the max number of lines before we determine this is
 | 
						|
  # not the correct parser.
 | 
						|
  MAX_LINES = 15
 | 
						|
 | 
						|
  # List of tokens that describe the structure of the log file.
 | 
						|
  tokens = [
 | 
						|
      lexer.Token('INITIAL', '(.+)\n', 'ParseString', ''),
 | 
						|
      ]
 | 
						|
 | 
						|
  def __init__(self, local_zone=True):
 | 
						|
    """Constructor for the SlowLexicalTextParser.
 | 
						|
 | 
						|
    Args:
 | 
						|
      local_zone: A boolean value that determines if the entries
 | 
						|
                  in the log file are stored in the local time
 | 
						|
                  zone of the computer that stored it or in a fixed
 | 
						|
                  timezone, like UTC.
 | 
						|
    """
 | 
						|
    # TODO: remove the multiple inheritance.
 | 
						|
    lexer.SelfFeederMixIn.__init__(self)
 | 
						|
    interface.BaseParser.__init__(self)
 | 
						|
    self.line_ready = False
 | 
						|
    self.attributes = {
 | 
						|
        'body': '',
 | 
						|
        'iyear': 0,
 | 
						|
        'imonth': 0,
 | 
						|
        'iday': 0,
 | 
						|
        'time': '',
 | 
						|
        'hostname': '',
 | 
						|
        'username': '',
 | 
						|
    }
 | 
						|
    self.local_zone = local_zone
 | 
						|
    self.file_entry = None
 | 
						|
 | 
						|
  def ClearValues(self):
 | 
						|
    """Clears all the values inside the attributes dict.
 | 
						|
 | 
						|
    All values that start with the letter 'i' are considered
 | 
						|
    to be an integer, otherwise string value is assumed.
 | 
						|
    """
 | 
						|
    self.line_ready = False
 | 
						|
    for attr in self.attributes:
 | 
						|
      if attr[0] == 'i':
 | 
						|
        self.attributes[attr] = 0
 | 
						|
      else:
 | 
						|
        self.attributes[attr] = ''
 | 
						|
 | 
						|
  def ParseIncomplete(self, match=None, **unused_kwargs):
 | 
						|
    """Indication that we've got a partial line to match against.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
    """
 | 
						|
    self.attributes['body'] += match.group(0)
 | 
						|
    self.line_ready = True
 | 
						|
 | 
						|
  def ParseMessage(self, **unused_kwargs):
 | 
						|
    """Signal that a line is ready to be parsed."""
 | 
						|
    self.line_ready = True
 | 
						|
 | 
						|
  def SetMonth(self, match=None, **unused_kwargs):
 | 
						|
    """Parses the month.
 | 
						|
 | 
						|
       This is a callback function for the text parser (lexer) and is
 | 
						|
       called by the corresponding lexer state.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
    """
 | 
						|
    self.attributes['imonth'] = int(
 | 
						|
        timelib.MONTH_DICT.get(match.group(1).lower(), 1))
 | 
						|
 | 
						|
  def SetDay(self, match=None, **unused_kwargs):
 | 
						|
    """Parses the day of the month.
 | 
						|
 | 
						|
       This is a callback function for the text parser (lexer) and is
 | 
						|
       called by the corresponding lexer state.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
    """
 | 
						|
    self.attributes['iday'] = int(match.group(1))
 | 
						|
 | 
						|
  def SetTime(self, match=None, **unused_kwargs):
 | 
						|
    """Set the time attribute.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
    """
 | 
						|
    self.attributes['time'] = match.group(1)
 | 
						|
 | 
						|
  def SetYear(self, match=None, **unused_kwargs):
 | 
						|
    """Parses the year.
 | 
						|
 | 
						|
       This is a callback function for the text parser (lexer) and is
 | 
						|
       called by the corresponding lexer state.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
    """
 | 
						|
    self.attributes['iyear'] = int(match.group(1))
 | 
						|
 | 
						|
  def Parse(self, parser_context, file_entry, parser_chain=None):
 | 
						|
    """Extract data from a text file.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      file_entry: A file entry object (instance of dfvfs.FileEntry).
 | 
						|
      parser_chain: Optional string containing the parsing chain up to this
 | 
						|
                    point. The default is None.
 | 
						|
 | 
						|
    Raises:
 | 
						|
      UnableToParseFile: when the file cannot be parsed.
 | 
						|
    """
 | 
						|
    path_spec_printable = u'{0:s}:{1:s}'.format(
 | 
						|
        file_entry.path_spec.type_indicator, file_entry.name)
 | 
						|
    file_object = file_entry.GetFileObject()
 | 
						|
 | 
						|
    self.file_entry = file_entry
 | 
						|
    # TODO: this is necessary since we inherit from lexer.SelfFeederMixIn.
 | 
						|
    self.file_object = file_object
 | 
						|
 | 
						|
    # Start by checking, is this a text file or not? Before we proceed
 | 
						|
    # any further.
 | 
						|
    file_object.seek(0, os.SEEK_SET)
 | 
						|
    if not utils.IsText(file_object.read(40)):
 | 
						|
      raise errors.UnableToParseFile(u'Not a text file, unable to proceed.')
 | 
						|
 | 
						|
    file_object.seek(0, os.SEEK_SET)
 | 
						|
 | 
						|
    error_count = 0
 | 
						|
    file_verified = False
 | 
						|
    # We need to clear out few values in the Lexer before continuing.
 | 
						|
    # There might be some leftovers from previous run.
 | 
						|
    self.error = 0
 | 
						|
    self.buffer = ''
 | 
						|
 | 
						|
    # Add ourselves to the parser chain, which will be used in all subsequent
 | 
						|
    # event creation in this parser.
 | 
						|
    parser_chain = self._BuildParserChain(parser_chain)
 | 
						|
 | 
						|
    while True:
 | 
						|
      _ = self.NextToken()
 | 
						|
 | 
						|
      if self.state == 'INITIAL':
 | 
						|
        self.entry_offset = getattr(self, 'next_entry_offset', 0)
 | 
						|
        self.next_entry_offset = file_object.tell() - len(self.buffer)
 | 
						|
 | 
						|
      if not file_verified and self.error >= self.MAX_LINES * 2:
 | 
						|
        logging.debug(
 | 
						|
            u'Lexer error count: {0:d} and current state {1:s}'.format(
 | 
						|
                self.error, self.state))
 | 
						|
        file_object.close()
 | 
						|
        raise errors.UnableToParseFile(
 | 
						|
            u'[{0:s}] unsupported file: {1:s}.'.format(
 | 
						|
                self.NAME, path_spec_printable))
 | 
						|
 | 
						|
      if self.line_ready:
 | 
						|
        try:
 | 
						|
          event_object = self.ParseLine(parser_context)
 | 
						|
          parser_context.ProduceEvent(
 | 
						|
              event_object, parser_chain=parser_chain, file_entry=file_entry)
 | 
						|
 | 
						|
          file_verified = True
 | 
						|
 | 
						|
        except errors.TimestampNotCorrectlyFormed as exception:
 | 
						|
          error_count += 1
 | 
						|
          if file_verified:
 | 
						|
            logging.debug(
 | 
						|
                u'[{0:s} VERIFIED] Error count: {1:d} and ERROR: {2:d}'.format(
 | 
						|
                    path_spec_printable, error_count, self.error))
 | 
						|
            logging.warning(
 | 
						|
                u'[{0:s}] Unable to parse timestamp with error: {1:s}'.format(
 | 
						|
                    self.NAME, exception))
 | 
						|
 | 
						|
          else:
 | 
						|
            logging.debug((
 | 
						|
                u'[{0:s} EVALUATING] Error count: {1:d} and ERROR: '
 | 
						|
                u'{2:d})').format(path_spec_printable, error_count, self.error))
 | 
						|
 | 
						|
            if error_count >= self.MAX_LINES:
 | 
						|
              file_object.close()
 | 
						|
              raise errors.UnableToParseFile(
 | 
						|
                  u'[{0:s}] unsupported file: {1:s}.'.format(
 | 
						|
                      self.NAME, path_spec_printable))
 | 
						|
 | 
						|
        finally:
 | 
						|
          self.ClearValues()
 | 
						|
 | 
						|
      if self.Empty():
 | 
						|
        # Try to fill the buffer to prevent the parser from ending prematurely.
 | 
						|
        self.Feed()
 | 
						|
 | 
						|
      if self.Empty():
 | 
						|
        break
 | 
						|
 | 
						|
    if not file_verified:
 | 
						|
      file_object.close()
 | 
						|
      raise errors.UnableToParseFile(
 | 
						|
          u'[{0:s}] unable to parser file: {1:s}.'.format(
 | 
						|
              self.NAME, path_spec_printable))
 | 
						|
 | 
						|
    file_offset = file_object.get_offset()
 | 
						|
    if file_offset < file_object.get_size():
 | 
						|
      logging.error((
 | 
						|
          u'{0:s} prematurely terminated parsing: {1:s} at offset: '
 | 
						|
          u'0x{2:08x}.').format(
 | 
						|
              self.NAME, path_spec_printable, file_offset))
 | 
						|
    file_object.close()
 | 
						|
 | 
						|
  def ParseString(self, match=None, **unused_kwargs):
 | 
						|
    """Return a string with combined values from the lexer.
 | 
						|
 | 
						|
    Args:
 | 
						|
      match: The regular expression match object.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      A string that combines the values that are so far
 | 
						|
      saved from the lexer.
 | 
						|
    """
 | 
						|
    try:
 | 
						|
      self.attributes['body'] += match.group(1).strip('\n')
 | 
						|
    except IndexError:
 | 
						|
      self.attributes['body'] += match.group(0).strip('\n')
 | 
						|
 | 
						|
  def PrintLine(self):
 | 
						|
    """"Return a string with combined values from the lexer."""
 | 
						|
    year = getattr(self.attributes, 'iyear', None)
 | 
						|
    month = getattr(self.attributes, 'imonth', None)
 | 
						|
    day = getattr(self.attributes, 'iday', None)
 | 
						|
 | 
						|
    if None in [year, month, day]:
 | 
						|
      date_string = u'[DATE NOT SET]'
 | 
						|
    else:
 | 
						|
      try:
 | 
						|
        year = int(year, 10)
 | 
						|
        month = int(month, 10)
 | 
						|
        day = int(day, 10)
 | 
						|
 | 
						|
        date_string = u'{0:04d}-{1:02d}-{2:02d}'.format(year, month, day)
 | 
						|
      except ValueError:
 | 
						|
        date_string = u'[DATE INVALID]'
 | 
						|
 | 
						|
    time_string = getattr(self.attributes, 'time', u'[TIME NOT SET]')
 | 
						|
    hostname_string = getattr(self.attributes, 'hostname', u'HOSTNAME NOT SET')
 | 
						|
    reporter_string = getattr(
 | 
						|
        self.attributes, 'reporter', u'[REPORTER NOT SET]')
 | 
						|
    body_string = getattr(self.attributes, 'body', u'[BODY NOT SET]')
 | 
						|
 | 
						|
    # TODO: this is a work in progress. The reason for the try-catch is that
 | 
						|
    # the text parser is handed a non-text file and must deal with converting
 | 
						|
    # arbitrary binary data.
 | 
						|
    try:
 | 
						|
      line = u'{0:s} {1:s} [{2:s}] {3:s} => {4:s}'.format(
 | 
						|
          date_string, time_string, hostname_string, reporter_string,
 | 
						|
          body_string)
 | 
						|
    except UnicodeError:
 | 
						|
      line = 'Unable to print line - due to encoding error.'
 | 
						|
 | 
						|
    return line
 | 
						|
 | 
						|
  def ParseLine(self, parser_context):
 | 
						|
    """Return an event object extracted from the current line.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
 | 
						|
    Returns:
 | 
						|
      An event object (instance of TextEvent).
 | 
						|
    """
 | 
						|
    if not self.attributes['time']:
 | 
						|
      raise errors.TimestampNotCorrectlyFormed(
 | 
						|
          u'Unable to parse timestamp, time not set.')
 | 
						|
 | 
						|
    if not self.attributes['iyear']:
 | 
						|
      raise errors.TimestampNotCorrectlyFormed(
 | 
						|
          u'Unable to parse timestamp, year not set.')
 | 
						|
 | 
						|
    times = self.attributes['time'].split(':')
 | 
						|
    if self.local_zone:
 | 
						|
      timezone = parser_context.timezone
 | 
						|
    else:
 | 
						|
      timezone = pytz.UTC
 | 
						|
 | 
						|
    if len(times) < 3:
 | 
						|
      raise errors.TimestampNotCorrectlyFormed((
 | 
						|
          u'Unable to parse timestamp, not of the format HH:MM:SS '
 | 
						|
          u'[{0:s}]').format(self.PrintLine()))
 | 
						|
    try:
 | 
						|
      secs = times[2].split('.')
 | 
						|
      if len(secs) == 2:
 | 
						|
        sec, us = secs
 | 
						|
      else:
 | 
						|
        sec = times[2]
 | 
						|
        us = 0
 | 
						|
 | 
						|
      timestamp = timelib.Timestamp.FromTimeParts(
 | 
						|
          int(self.attributes['iyear']), self.attributes['imonth'],
 | 
						|
          self.attributes['iday'], int(times[0]), int(times[1]),
 | 
						|
          int(sec), microseconds=int(us), timezone=timezone)
 | 
						|
 | 
						|
    except ValueError as exception:
 | 
						|
      raise errors.TimestampNotCorrectlyFormed(
 | 
						|
          u'Unable to parse: {0:s} with error: {1:s}'.format(
 | 
						|
              self.PrintLine(), exception))
 | 
						|
 | 
						|
    return self.CreateEvent(
 | 
						|
        timestamp, getattr(self, 'entry_offset', 0), self.attributes)
 | 
						|
 | 
						|
  # TODO: this is a rough initial implementation to get this working.
 | 
						|
  def CreateEvent(self, timestamp, offset, attributes):
 | 
						|
    """Creates an event.
 | 
						|
 | 
						|
       This function should be overwritten by text parsers that require
 | 
						|
       to generate specific event object type, the default is TextEvent.
 | 
						|
 | 
						|
    Args:
 | 
						|
      timestamp: The timestamp time value. The timestamp contains the
 | 
						|
                 number of microseconds since Jan 1, 1970 00:00:00 UTC.
 | 
						|
      offset: The offset of the event.
 | 
						|
      attributes: A dict that contains the events attributes.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      An event object (instance of TextEvent).
 | 
						|
    """
 | 
						|
    return text_events.TextEvent(timestamp, offset, attributes)
 | 
						|
 | 
						|
 | 
						|
class TextCSVParser(interface.BaseParser):
 | 
						|
  """An implementation of a simple CSV line-per-entry log files."""
 | 
						|
 | 
						|
  # A list that contains the names of all the fields in the log file.
 | 
						|
  COLUMNS = []
 | 
						|
 | 
						|
  # A CSV file is comma separated, but this can be overwritten to include
 | 
						|
  # tab, pipe or other character separation.
 | 
						|
  VALUE_SEPARATOR = ','
 | 
						|
 | 
						|
  # If there is a header before the lines start it can be defined here, and
 | 
						|
  # the number of header lines that need to be skipped before the parsing
 | 
						|
  # starts.
 | 
						|
  NUMBER_OF_HEADER_LINES = 0
 | 
						|
 | 
						|
  # If there is a special quote character used inside the structured text
 | 
						|
  # it can be defined here.
 | 
						|
  QUOTE_CHAR = '"'
 | 
						|
 | 
						|
  # Value that should not appear inside the file, made to test the actual
 | 
						|
  # file to see if it confirms to standards.
 | 
						|
  MAGIC_TEST_STRING = 'RegnThvotturMeistarans'
 | 
						|
 | 
						|
  def VerifyRow(self, unused_parser_context, unused_row):
 | 
						|
    """Return a bool indicating whether or not this is the correct parser.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      row: A single row from the CSV file.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      True if this is the correct parser, False otherwise.
 | 
						|
    """
 | 
						|
    pass
 | 
						|
 | 
						|
  def ParseRow(
 | 
						|
      self, parser_context, row_offset, row, file_entry=None,
 | 
						|
      parser_chain=None):
 | 
						|
    """Parse a line of the log file and extract event objects.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      row_offset: The offset of the row.
 | 
						|
      row: A dictionary containing all the fields as denoted in the
 | 
						|
           COLUMNS class list.
 | 
						|
      file_entry: optional file entry object (instance of dfvfs.FileEntry).
 | 
						|
                  The default is None.
 | 
						|
      parser_chain: Optional string containing the parsing chain up to this
 | 
						|
                    point. The default is None.
 | 
						|
    """
 | 
						|
    event_object = event.EventObject()
 | 
						|
    if row_offset is not None:
 | 
						|
      event_object.offset = row_offset
 | 
						|
    event_object.row_dict = row
 | 
						|
    parser_context.ProduceEvent(
 | 
						|
        event_object, parser_chain=parser_chain, file_entry=file_entry)
 | 
						|
 | 
						|
  def Parse(self, parser_context, file_entry, parser_chain=None):
 | 
						|
    """Extract data from a CVS file.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      file_entry: A file entry object (instance of dfvfs.FileEntry).
 | 
						|
      parser_chain: Optional string containing the parsing chain up to this
 | 
						|
                    point. The default is None.
 | 
						|
    """
 | 
						|
    path_spec_printable = file_entry.path_spec.comparable.replace(u'\n', u';')
 | 
						|
    file_object = file_entry.GetFileObject()
 | 
						|
    file_object.seek(0, os.SEEK_SET)
 | 
						|
 | 
						|
    text_file_object = text_file.TextFile(file_object)
 | 
						|
 | 
						|
    # If we specifically define a number of lines we should skip do that here.
 | 
						|
    for _ in range(0, self.NUMBER_OF_HEADER_LINES):
 | 
						|
      _ = text_file_object.readline()
 | 
						|
 | 
						|
    reader = csv.DictReader(
 | 
						|
        text_file_object, fieldnames=self.COLUMNS,
 | 
						|
        restkey=self.MAGIC_TEST_STRING, restval=self.MAGIC_TEST_STRING,
 | 
						|
        delimiter=self.VALUE_SEPARATOR, quotechar=self.QUOTE_CHAR)
 | 
						|
 | 
						|
    try:
 | 
						|
      row = reader.next()
 | 
						|
    except (csv.Error, StopIteration):
 | 
						|
      file_object.close()
 | 
						|
      raise errors.UnableToParseFile(
 | 
						|
          u'[{0:s}] Unable to parse CSV file: {1:s}.'.format(
 | 
						|
              self.NAME, path_spec_printable))
 | 
						|
 | 
						|
    number_of_columns = len(self.COLUMNS)
 | 
						|
    number_of_records = len(row)
 | 
						|
 | 
						|
    if number_of_records != number_of_columns:
 | 
						|
      file_object.close()
 | 
						|
      raise errors.UnableToParseFile((
 | 
						|
          u'[{0:s}] Unable to parse CSV file: {1:s}. Wrong number of '
 | 
						|
          u'records (expected: {2:d}, got: {3:d})').format(
 | 
						|
              self.NAME, path_spec_printable, number_of_columns,
 | 
						|
              number_of_records))
 | 
						|
 | 
						|
    for key, value in row.items():
 | 
						|
      if key == self.MAGIC_TEST_STRING or value == self.MAGIC_TEST_STRING:
 | 
						|
        file_object.close()
 | 
						|
        raise errors.UnableToParseFile((
 | 
						|
            u'[{0:s}] Unable to parse CSV file: {1:s}. Signature '
 | 
						|
            u'mismatch.').format(self.NAME, path_spec_printable))
 | 
						|
 | 
						|
    if not self.VerifyRow(parser_context, row):
 | 
						|
      file_object.close()
 | 
						|
      raise errors.UnableToParseFile((
 | 
						|
          u'[{0:s}] Unable to parse CSV file: {1:s}. Verification '
 | 
						|
          u'failed.').format(self.NAME, path_spec_printable))
 | 
						|
 | 
						|
    # Add ourselves to the parser chain, which will be used in all subsequent
 | 
						|
    # event creation in this parser.
 | 
						|
    parser_chain = self._BuildParserChain(parser_chain)
 | 
						|
 | 
						|
    self.ParseRow(
 | 
						|
        parser_context, text_file_object.tell(), row, file_entry=file_entry,
 | 
						|
        parser_chain=parser_chain)
 | 
						|
 | 
						|
    for row in reader:
 | 
						|
      self.ParseRow(
 | 
						|
          parser_context, text_file_object.tell(), row, file_entry=file_entry,
 | 
						|
          parser_chain=parser_chain)
 | 
						|
 | 
						|
    file_object.close()
 | 
						|
 | 
						|
 | 
						|
def PyParseRangeCheck(lower_bound, upper_bound):
 | 
						|
  """Verify that a number is within a defined range.
 | 
						|
 | 
						|
  This is a callback method for pyparsing setParseAction
 | 
						|
  that verifies that a read number is within a certain range.
 | 
						|
 | 
						|
  To use this method it needs to be defined as a callback method
 | 
						|
  in setParseAction with the upper and lower bound set as parameters.
 | 
						|
 | 
						|
  Args:
 | 
						|
    lower_bound: An integer representing the lower bound of the range.
 | 
						|
    upper_bound: An integer representing the upper bound of the range.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    A callback method that can be used by pyparsing setParseAction.
 | 
						|
  """
 | 
						|
  def CheckRange(unused_string, unused_location, tokens):
 | 
						|
    """Parse the arguments."""
 | 
						|
    try:
 | 
						|
      check_number = tokens[0]
 | 
						|
    except IndexError:
 | 
						|
      check_number = -1
 | 
						|
 | 
						|
    if check_number < lower_bound:
 | 
						|
      raise pyparsing.ParseException(
 | 
						|
          u'Value: {0:d} precedes lower bound: {1:d}'.format(
 | 
						|
              check_number, lower_bound))
 | 
						|
 | 
						|
    if check_number > upper_bound:
 | 
						|
      raise pyparsing.ParseException(
 | 
						|
          u'Value: {0:d} exceeds upper bound: {1:d}'.format(
 | 
						|
              check_number, upper_bound))
 | 
						|
 | 
						|
  # Since callback methods for pyparsing need to accept certain parameters
 | 
						|
  # and there is no way to define conditions, like upper and lower bounds
 | 
						|
  # we need to return here a method that accepts those pyparsing parameters.
 | 
						|
  return CheckRange
 | 
						|
 | 
						|
 | 
						|
def PyParseIntCast(unused_string, unused_location, tokens):
 | 
						|
  """Return an integer from a string.
 | 
						|
 | 
						|
  This is a pyparsing callback method that converts the matched
 | 
						|
  string into an integer.
 | 
						|
 | 
						|
  The method modifies the content of the tokens list and converts
 | 
						|
  them all to an integer value.
 | 
						|
 | 
						|
  Args:
 | 
						|
    unused_string: The original parsed string.
 | 
						|
    unused_location: The location within the string where the match was made.
 | 
						|
    tokens: A list of extracted tokens (where the string to be converted is
 | 
						|
    stored).
 | 
						|
  """
 | 
						|
  # Cast the regular tokens.
 | 
						|
  for index, token in enumerate(tokens):
 | 
						|
    try:
 | 
						|
      tokens[index] = int(token)
 | 
						|
    except ValueError:
 | 
						|
      logging.error(u'Unable to cast [{0:s}] to an int, setting to 0'.format(
 | 
						|
          token))
 | 
						|
      tokens[index] = 0
 | 
						|
 | 
						|
  # We also need to cast the dictionary built tokens.
 | 
						|
  for key in tokens.keys():
 | 
						|
    try:
 | 
						|
      tokens[key] = int(tokens[key], 10)
 | 
						|
    except ValueError:
 | 
						|
      logging.error(
 | 
						|
          u'Unable to cast [{0:s} = {1:d}] to an int, setting to 0'.format(
 | 
						|
              key, tokens[key]))
 | 
						|
      tokens[key] = 0
 | 
						|
 | 
						|
 | 
						|
def PyParseJoinList(unused_string, unused_location, tokens):
 | 
						|
  """Return a joined token from a list of tokens.
 | 
						|
 | 
						|
  This is a callback method for pyparsing setParseAction that modifies
 | 
						|
  the returned token list to join all the elements in the list to a single
 | 
						|
  token.
 | 
						|
 | 
						|
  Args:
 | 
						|
    unused_string: The original parsed string.
 | 
						|
    unused_location: The location within the string where the match was made.
 | 
						|
    tokens: A list of extracted tokens. This is the list that should be joined
 | 
						|
    together and stored as a single token.
 | 
						|
  """
 | 
						|
  join_list = []
 | 
						|
  for token in tokens:
 | 
						|
    try:
 | 
						|
      join_list.append(str(token))
 | 
						|
    except UnicodeDecodeError:
 | 
						|
      join_list.append(repr(token))
 | 
						|
 | 
						|
  tokens[0] = u''.join(join_list)
 | 
						|
  del tokens[1:]
 | 
						|
 | 
						|
 | 
						|
class PyparsingConstants(object):
 | 
						|
  """A class that maintains constants for pyparsing."""
 | 
						|
 | 
						|
  # Numbers.
 | 
						|
  INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast)
 | 
						|
  IPV4_OCTET = pyparsing.Word(pyparsing.nums, min=1, max=3).setParseAction(
 | 
						|
      PyParseIntCast, PyParseRangeCheck(0, 255))
 | 
						|
  IPV4_ADDRESS = (IPV4_OCTET + ('.' + IPV4_OCTET) * 3).setParseAction(
 | 
						|
      PyParseJoinList)
 | 
						|
 | 
						|
  # TODO: Fix the IPv6 address specification to be more accurate (8 :, correct
 | 
						|
  # size, etc).
 | 
						|
  IPV6_ADDRESS = pyparsing.Word(':' + pyparsing.hexnums).setParseAction(
 | 
						|
      PyParseJoinList)
 | 
						|
 | 
						|
  # Common words.
 | 
						|
  MONTH = pyparsing.Word(
 | 
						|
      pyparsing.string.uppercase, pyparsing.string.lowercase,
 | 
						|
      exact=3)
 | 
						|
 | 
						|
  # Define date structures.
 | 
						|
  HYPHEN = pyparsing.Literal('-').suppress()
 | 
						|
  YEAR = pyparsing.Word(pyparsing.nums, exact=4).setParseAction(
 | 
						|
      PyParseIntCast)
 | 
						|
  TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction(
 | 
						|
      PyParseIntCast)
 | 
						|
  ONE_OR_TWO_DIGITS = pyparsing.Word(
 | 
						|
      pyparsing.nums, min=1, max=2).setParseAction(PyParseIntCast)
 | 
						|
  DATE = pyparsing.Group(
 | 
						|
      YEAR + pyparsing.Suppress('-') + TWO_DIGITS +
 | 
						|
      pyparsing.Suppress('-') + TWO_DIGITS)
 | 
						|
  DATE_REV = pyparsing.Group(
 | 
						|
      TWO_DIGITS + pyparsing.Suppress('-') + TWO_DIGITS +
 | 
						|
      pyparsing.Suppress('-') + YEAR)
 | 
						|
  TIME = pyparsing.Group(
 | 
						|
      TWO_DIGITS + pyparsing.Suppress(':') + TWO_DIGITS +
 | 
						|
      pyparsing.Suppress(':') + TWO_DIGITS)
 | 
						|
  TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER
 | 
						|
  DATE_TIME = DATE + TIME
 | 
						|
  DATE_TIME_MSEC = DATE + TIME_MSEC
 | 
						|
 | 
						|
  COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo(
 | 
						|
      pyparsing.LineEnd())
 | 
						|
  # TODO: Add more commonly used structs that can be used by parsers.
 | 
						|
  PID = pyparsing.Word(
 | 
						|
      pyparsing.nums, min=1, max=5).setParseAction(PyParseIntCast)
 | 
						|
 | 
						|
 | 
						|
class PyparsingSingleLineTextParser(interface.BaseParser):
 | 
						|
  """Single line text parser based on the pyparsing library."""
 | 
						|
 | 
						|
  # The actual structure, this needs to be defined by each parser.
 | 
						|
  # This is defined as a list of tuples so that more then a single line
 | 
						|
  # structure can be defined. That way the parser can support more than a
 | 
						|
  # single type of log entry, despite them all having in common the constraint
 | 
						|
  # that each log entry is a single line.
 | 
						|
  # The tuple should have two entries, a key and a structure. This is done to
 | 
						|
  # keep the structures in an order of priority/preference.
 | 
						|
  # The key is a comment or an identification that is passed to the ParseRecord
 | 
						|
  # function so that the developer can identify which structure got parsed.
 | 
						|
  # The value is the actual pyparsing structure.
 | 
						|
  LINE_STRUCTURES = []
 | 
						|
 | 
						|
  # In order for the tool to not read too much data into a buffer to evaluate
 | 
						|
  # whether or not the parser is the right one for this file or not we
 | 
						|
  # specifically define a maximum amount of bytes a single line can occupy. This
 | 
						|
  # constant can be overwritten by implementations if their format might have a
 | 
						|
  # longer line than 400 bytes.
 | 
						|
  MAX_LINE_LENGTH = 400
 | 
						|
 | 
						|
  # Define an encoding. If a file is encoded using specific encoding it is
 | 
						|
  # advised to include it here. If this class constant is set all lines wil be
 | 
						|
  # decoded prior to being sent to parsing by pyparsing, if not properly set it
 | 
						|
  # could negatively affect parsing of the file.
 | 
						|
  # If this value needs to be calculated on the fly (not a fixed constant for
 | 
						|
  # this particular file type) it can be done by modifying the self.encoding
 | 
						|
  # attribute.
 | 
						|
  ENCODING = ''
 | 
						|
 | 
						|
  def __init__(self):
 | 
						|
    """Initializes the pyparsing single-line text parser object."""
 | 
						|
    super(PyparsingSingleLineTextParser, self).__init__()
 | 
						|
    self.encoding = self.ENCODING
 | 
						|
    self._current_offset = 0
 | 
						|
    # TODO: self._line_structures is a work-around and this needs
 | 
						|
    # a structural fix.
 | 
						|
    self._line_structures = self.LINE_STRUCTURES
 | 
						|
 | 
						|
  def _ReadLine(
 | 
						|
      self, parser_context, file_entry, text_file_object, max_len=0,
 | 
						|
      quiet=False, depth=0):
 | 
						|
    """Read a single line from a text file and return it back.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      file_entry: A file entry object (instance of dfvfs.FileEntry).
 | 
						|
      text_file_object: A text file object (instance of dfvfs.TextFile).
 | 
						|
      max_len: If defined determines the maximum number of bytes a single line
 | 
						|
               can take.
 | 
						|
      quiet: If True then a decode warning is not displayed.
 | 
						|
      depth: A threshold of how many newlines we can encounter before bailing
 | 
						|
             out.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      A single line read from the file-like object, or the maximum number of
 | 
						|
      characters (if max_len defined and line longer than the defined size).
 | 
						|
    """
 | 
						|
    if max_len:
 | 
						|
      line = text_file_object.readline(max_len)
 | 
						|
    else:
 | 
						|
      line = text_file_object.readline()
 | 
						|
 | 
						|
    if not line:
 | 
						|
      return
 | 
						|
 | 
						|
    # If line is empty, skip it and go on.
 | 
						|
    if line == '\n' or line == '\r\n':
 | 
						|
      # Max 40 new lines in a row before we bail out.
 | 
						|
      if depth == 40:
 | 
						|
        return ''
 | 
						|
 | 
						|
      return self._ReadLine(
 | 
						|
          parser_context, file_entry, text_file_object, max_len=max_len,
 | 
						|
          depth=depth + 1)
 | 
						|
 | 
						|
    if not self.encoding:
 | 
						|
      return line.strip()
 | 
						|
 | 
						|
    try:
 | 
						|
      decoded_line = line.decode(self.encoding)
 | 
						|
      return decoded_line.strip()
 | 
						|
    except UnicodeDecodeError:
 | 
						|
      if not quiet:
 | 
						|
        logging.warning((
 | 
						|
            u'Unable to decode line [{0:s}...] with encoding: {1:s} in '
 | 
						|
            u'file: {2:s}').format(
 | 
						|
                repr(line[1:30]), self.encoding,
 | 
						|
                parser_context.GetDisplayName(file_entry)))
 | 
						|
      return line.strip()
 | 
						|
 | 
						|
  def Parse(self, parser_context, file_entry, parser_chain=None):
 | 
						|
    """Extract data from a text file using a pyparsing definition.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      file_entry: A file entry object (instance of dfvfs.FileEntry).
 | 
						|
      parser_chain: Optional string containing the parsing chain up to this
 | 
						|
                    point. The default is None.
 | 
						|
 | 
						|
    Raises:
 | 
						|
      UnableToParseFile: when the file cannot be parsed.
 | 
						|
    """
 | 
						|
    # TODO: find a more elegant way for this; currently the mac_wifi and
 | 
						|
    # syslog parser seem to rely on this member.
 | 
						|
    self.file_entry = file_entry
 | 
						|
 | 
						|
    file_object = file_entry.GetFileObject()
 | 
						|
 | 
						|
    # TODO: self._line_structures is a work-around and this needs
 | 
						|
    # a structural fix.
 | 
						|
    if not self._line_structures:
 | 
						|
      raise errors.UnableToParseFile(
 | 
						|
          u'Line structure undeclared, unable to proceed.')
 | 
						|
 | 
						|
    file_object.seek(0, os.SEEK_SET)
 | 
						|
    text_file_object = text_file.TextFile(file_object)
 | 
						|
 | 
						|
    line = self._ReadLine(
 | 
						|
        parser_context, file_entry, text_file_object,
 | 
						|
        max_len=self.MAX_LINE_LENGTH, quiet=True)
 | 
						|
    if not line:
 | 
						|
      raise errors.UnableToParseFile(u'Not a text file.')
 | 
						|
 | 
						|
    if len(line) == self.MAX_LINE_LENGTH or len(
 | 
						|
        line) == self.MAX_LINE_LENGTH - 1:
 | 
						|
      logging.debug((
 | 
						|
          u'Trying to read a line and reached the maximum allowed length of '
 | 
						|
          u'{0:d}. The last few bytes of the line are: {1:s} [parser '
 | 
						|
          u'{2:s}]').format(
 | 
						|
              self.MAX_LINE_LENGTH, repr(line[-10:]), self.NAME))
 | 
						|
 | 
						|
    if not utils.IsText(line):
 | 
						|
      raise errors.UnableToParseFile(u'Not a text file, unable to proceed.')
 | 
						|
 | 
						|
    if not self.VerifyStructure(parser_context, line):
 | 
						|
      raise errors.UnableToParseFile('Wrong file structure.')
 | 
						|
 | 
						|
    # Add ourselves to the parser chain, which will be used in all subsequent
 | 
						|
    # event creation in this parser.
 | 
						|
    parser_chain = self._BuildParserChain(parser_chain)
 | 
						|
 | 
						|
    # Set the offset to the beginning of the file.
 | 
						|
    self._current_offset = 0
 | 
						|
    # Read every line in the text file.
 | 
						|
    while line:
 | 
						|
      parsed_structure = None
 | 
						|
      use_key = None
 | 
						|
      # Try to parse the line using all the line structures.
 | 
						|
      for key, structure in self.LINE_STRUCTURES:
 | 
						|
        try:
 | 
						|
          parsed_structure = structure.parseString(line)
 | 
						|
        except pyparsing.ParseException:
 | 
						|
          pass
 | 
						|
        if parsed_structure:
 | 
						|
          use_key = key
 | 
						|
          break
 | 
						|
 | 
						|
      if parsed_structure:
 | 
						|
        parsed_event = self.ParseRecord(
 | 
						|
            parser_context, use_key, parsed_structure)
 | 
						|
        if parsed_event:
 | 
						|
          parsed_event.offset = self._current_offset
 | 
						|
          parser_context.ProduceEvent(
 | 
						|
              parsed_event, parser_chain=parser_chain, file_entry=file_entry)
 | 
						|
      else:
 | 
						|
        logging.warning(u'Unable to parse log line: {0:s}'.format(line))
 | 
						|
 | 
						|
      self._current_offset = text_file_object.get_offset()
 | 
						|
      line = self._ReadLine(parser_context, file_entry, text_file_object)
 | 
						|
 | 
						|
    file_object.close()
 | 
						|
 | 
						|
  @abc.abstractmethod
 | 
						|
  def ParseRecord(self, parser_context, key, structure):
 | 
						|
    """Parse a single extracted pyparsing structure.
 | 
						|
 | 
						|
    This function takes as an input a parsed pyparsing structure
 | 
						|
    and produces an EventObject if possible from that structure.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      key: An identification string indicating the name of the parsed
 | 
						|
           structure.
 | 
						|
      structure: A pyparsing.ParseResults object from a line in the
 | 
						|
                 log file.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      An event object (instance of EventObject) or None.
 | 
						|
    """
 | 
						|
 | 
						|
  @abc.abstractmethod
 | 
						|
  def VerifyStructure(self, parser_context, line):
 | 
						|
    """Verify the structure of the file and return boolean based on that check.
 | 
						|
 | 
						|
    This function should read enough text from the text file to confirm
 | 
						|
    that the file is the correct one for this particular parser.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      line: A single line from the text file.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      True if this is the correct parser, False otherwise.
 | 
						|
    """
 | 
						|
 | 
						|
 | 
						|
class EncodedTextReader(object):
 | 
						|
  """Class to read simple encoded text."""
 | 
						|
 | 
						|
  def __init__(self, buffer_size=2048, encoding=None):
 | 
						|
    """Initializes the encoded test reader object.
 | 
						|
 | 
						|
    Args:
 | 
						|
      buffer_size: optional buffer size. The default is 2048.
 | 
						|
      encoding: optional encoding. The default is None.
 | 
						|
    """
 | 
						|
    super(EncodedTextReader, self).__init__()
 | 
						|
    self._buffer = ''
 | 
						|
    self._buffer_size = buffer_size
 | 
						|
    self._current_offset = 0
 | 
						|
    self._encoding = encoding
 | 
						|
 | 
						|
    if self._encoding:
 | 
						|
      self._new_line = u'\n'.encode(self._encoding)
 | 
						|
      self._carriage_return = u'\r'.encode(self._encoding)
 | 
						|
    else:
 | 
						|
      self._new_line = '\n'
 | 
						|
      self._carriage_return = '\r'
 | 
						|
 | 
						|
    self._new_line_length = len(self._new_line)
 | 
						|
    self._carriage_return_length = len(self._carriage_return)
 | 
						|
 | 
						|
    self.lines = u''
 | 
						|
 | 
						|
  def _ReadLine(self, file_object):
 | 
						|
    """Reads a line from the file object.
 | 
						|
 | 
						|
    Args:
 | 
						|
      file_object: the file-like object.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      A string containing the line.
 | 
						|
    """
 | 
						|
    if len(self._buffer) < self._buffer_size:
 | 
						|
      self._buffer = ''.join([
 | 
						|
          self._buffer, file_object.read(self._buffer_size)])
 | 
						|
 | 
						|
    line, new_line, self._buffer = self._buffer.partition(self._new_line)
 | 
						|
    if not line and not new_line:
 | 
						|
      line = self._buffer
 | 
						|
      self._buffer = ''
 | 
						|
 | 
						|
    self._current_offset += len(line)
 | 
						|
 | 
						|
    # Strip carriage returns from the text.
 | 
						|
    if line.endswith(self._carriage_return):
 | 
						|
      line = line[:-self._carriage_return_length]
 | 
						|
 | 
						|
    if new_line:
 | 
						|
      line = ''.join([line, self._new_line])
 | 
						|
      self._current_offset += self._new_line_length
 | 
						|
 | 
						|
    # If a parser specifically indicates specific encoding we need
 | 
						|
    # to handle the buffer as it is an encoded string.
 | 
						|
    # If it fails we fail back to the original raw string.
 | 
						|
    if self._encoding:
 | 
						|
      try:
 | 
						|
        line = line.decode(self._encoding)
 | 
						|
      except UnicodeDecodeError:
 | 
						|
        # TODO: it might be better to raise here.
 | 
						|
        pass
 | 
						|
 | 
						|
    return line
 | 
						|
 | 
						|
  def ReadLine(self, file_object):
 | 
						|
    """Reads a line.
 | 
						|
 | 
						|
    Args:
 | 
						|
      file_object: the file-like object.
 | 
						|
 | 
						|
    Returns:
 | 
						|
      A single line read from the lines buffer.
 | 
						|
    """
 | 
						|
    line, _, self.lines = self.lines.partition('\n')
 | 
						|
    if not line:
 | 
						|
      self.ReadLines(file_object)
 | 
						|
      line, _, self.lines = self.lines.partition('\n')
 | 
						|
 | 
						|
    return line
 | 
						|
 | 
						|
  def ReadLines(self, file_object):
 | 
						|
    """Reads lines into the lines buffer.
 | 
						|
 | 
						|
    Args:
 | 
						|
      file_object: the file-like object.
 | 
						|
    """
 | 
						|
    lines_size = len(self.lines)
 | 
						|
    if lines_size < self._buffer_size:
 | 
						|
      lines_size = self._buffer_size - lines_size
 | 
						|
      while lines_size > 0:
 | 
						|
        line = self._ReadLine(file_object)
 | 
						|
        if not line:
 | 
						|
          break
 | 
						|
 | 
						|
        self.lines = u''.join([self.lines, line])
 | 
						|
        lines_size -= len(line)
 | 
						|
 | 
						|
  def Reset(self):
 | 
						|
    """Resets the encoded text reader."""
 | 
						|
    self._buffer = ''
 | 
						|
    self._current_offset = 0
 | 
						|
 | 
						|
    self.lines = u''
 | 
						|
 | 
						|
  def SkipAhead(self, file_object, number_of_characters):
 | 
						|
    """Skips ahead a number of characters.
 | 
						|
 | 
						|
    Args:
 | 
						|
      file_object: the file-like object.
 | 
						|
      number_of_characters: the number of characters.
 | 
						|
    """
 | 
						|
    lines_size = len(self.lines)
 | 
						|
    while number_of_characters >= lines_size:
 | 
						|
      number_of_characters -= lines_size
 | 
						|
 | 
						|
      self.lines = u''
 | 
						|
      self.ReadLines(file_object)
 | 
						|
      lines_size = len(self.lines)
 | 
						|
      if lines_size == 0:
 | 
						|
        return
 | 
						|
 | 
						|
    self.lines = self.lines[number_of_characters:]
 | 
						|
 | 
						|
 | 
						|
class PyparsingMultiLineTextParser(PyparsingSingleLineTextParser):
 | 
						|
  """Multi line text parser based on the pyparsing library."""
 | 
						|
 | 
						|
  BUFFER_SIZE = 2048
 | 
						|
 | 
						|
  def __init__(self):
 | 
						|
    """Initializes the pyparsing multi-line text parser object."""
 | 
						|
    super(PyparsingMultiLineTextParser, self).__init__()
 | 
						|
    self._buffer_size = self.BUFFER_SIZE
 | 
						|
    self._text_reader = EncodedTextReader(
 | 
						|
        buffer_size=self.BUFFER_SIZE, encoding=self.ENCODING)
 | 
						|
 | 
						|
  def Parse(self, parser_context, file_entry, parser_chain=None):
 | 
						|
    """Parse a text file using a pyparsing definition.
 | 
						|
 | 
						|
    Args:
 | 
						|
      parser_context: A parser context object (instance of ParserContext).
 | 
						|
      file_entry: A file entry object (instance of dfvfs.FileEntry).
 | 
						|
      parser_chain: Optional string containing the parsing chain up to this
 | 
						|
                    point. The default is None.
 | 
						|
 | 
						|
    Raises:
 | 
						|
      UnableToParseFile: if the line structures are missing.
 | 
						|
    """
 | 
						|
    if not self.LINE_STRUCTURES:
 | 
						|
      raise errors.UnableToParseFile(u'Missing line structures.')
 | 
						|
 | 
						|
    self._text_reader.Reset()
 | 
						|
 | 
						|
    file_object = file_entry.GetFileObject()
 | 
						|
    file_object.seek(0, os.SEEK_SET)
 | 
						|
 | 
						|
    try:
 | 
						|
      self._text_reader.ReadLines(file_object)
 | 
						|
    except UnicodeDecodeError as exception:
 | 
						|
      raise errors.UnableToParseFile(
 | 
						|
          u'Not a text file, with error: {0:s}'.format(exception))
 | 
						|
 | 
						|
    if not utils.IsText(self._text_reader.lines):
 | 
						|
      raise errors.UnableToParseFile(u'Not a text file, unable to proceed.')
 | 
						|
 | 
						|
    if not self.VerifyStructure(parser_context, self._text_reader.lines):
 | 
						|
      raise errors.UnableToParseFile(u'Wrong file structure.')
 | 
						|
 | 
						|
    # Add ourselves to the parser chain, which will be used in all subsequent
 | 
						|
    # event creation in this parser.
 | 
						|
    parser_chain = self._BuildParserChain(parser_chain)
 | 
						|
 | 
						|
    # Read every line in the text file.
 | 
						|
    while self._text_reader.lines:
 | 
						|
      # Initialize pyparsing objects.
 | 
						|
      tokens = None
 | 
						|
      start = 0
 | 
						|
      end = 0
 | 
						|
 | 
						|
      key = None
 | 
						|
 | 
						|
      # Try to parse the line using all the line structures.
 | 
						|
      for key, structure in self.LINE_STRUCTURES:
 | 
						|
        try:
 | 
						|
          parsed_structure = next(
 | 
						|
              structure.scanString(self._text_reader.lines, maxMatches=1), None)
 | 
						|
        except pyparsing.ParseException:
 | 
						|
          continue
 | 
						|
 | 
						|
        if not parsed_structure:
 | 
						|
          continue
 | 
						|
 | 
						|
        tokens, start, end = parsed_structure
 | 
						|
 | 
						|
        # Only want to parse the structure if it starts
 | 
						|
        # at the beginning of the buffer.
 | 
						|
        if start == 0:
 | 
						|
          break
 | 
						|
 | 
						|
      if tokens and start == 0:
 | 
						|
        parsed_event = self.ParseRecord(parser_context, key, tokens)
 | 
						|
        if parsed_event:
 | 
						|
          # TODO: need a reliable way to handle this.
 | 
						|
          # parsed_event.offset = self._text_reader.line_offset
 | 
						|
          parser_context.ProduceEvent(
 | 
						|
              parsed_event, parser_chain=parser_chain, file_entry=file_entry)
 | 
						|
 | 
						|
        self._text_reader.SkipAhead(file_object, end)
 | 
						|
 | 
						|
      else:
 | 
						|
        odd_line = self._text_reader.ReadLine(file_object)
 | 
						|
        if odd_line:
 | 
						|
          logging.warning(
 | 
						|
              u'Unable to parse log line: {0:s}'.format(repr(odd_line)))
 | 
						|
 | 
						|
      try:
 | 
						|
        self._text_reader.ReadLines(file_object)
 | 
						|
      except UnicodeDecodeError as exception:
 | 
						|
        logging.error(
 | 
						|
            u'[{0:s}] Unable to read lines from file: {1:s} with error: '
 | 
						|
            u'{2:s}'.format(
 | 
						|
                parser_chain,
 | 
						|
                file_entry.path_spec.comparable.replace(u'\n', u';'),
 | 
						|
                exception))
 |