plaso-rubanetra/plaso/lib/output.py
2020-04-06 18:48:34 +02:00

395 lines
13 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the interface for output parsing of plaso.
The default output or storage mechanism of Plaso is not in a human
readable format. There needs to be a way to define the output in such
a way.
After the timeline is collected and stored another tool can read, filter,
sort and process the output inside the storage, and send each processed
entry to an output formatter that takes care of parsing the output into
a human readable format for easy human consumption/analysis.
"""
import abc
import logging
import sys
from plaso.lib import errors
from plaso.lib import registry
from plaso.lib import utils
import pytz
class LogOutputFormatter(object):
"""A base class for formatting output produced by plaso.
This class exists mostly for documentation purposes. Subclasses should
override the relevant methods to act on the callbacks.
"""
__metaclass__ = registry.MetaclassRegistry
__abstract = True
# Optional arguments to be added to the argument parser.
# An example would be:
# ARGUMENTS = [('--myparameter', {
# 'action': 'store',
# 'help': 'This is my parameter help',
# 'dest': 'myparameter',
# 'default': '',
# 'type': 'unicode'})]
#
# Where all arguments into the dict object have a direct translation
# into the argparse parser.
ARGUMENTS = []
def __init__(self, store, filehandle=sys.stdout, config=None,
filter_use=None):
"""Constructor for the output module.
Args:
store: A StorageFile object that defines the storage.
filehandle: A file-like object that can be written to.
config: The configuration object, containing config information.
filter_use: A filter_interface.FilterObject object.
"""
zone = getattr(config, 'timezone', 'UTC')
try:
self.zone = pytz.timezone(zone)
except pytz.UnknownTimeZoneError:
logging.warning(u'Unkown timezone: {0:s} defaulting to: UTC'.format(
zone))
self.zone = pytz.utc
self.filehandle = filehandle
self.store = store
self._filter = filter_use
self._config = config
self.encoding = getattr(config, 'preferred_encoding', 'utf-8')
# TODO: this function seems to be only called with the default arguments,
# so refactor this function away.
def FetchEntry(self, store_number=-1, store_index=-1):
"""Fetches an entry from the storage.
Fetches the next entry in the storage file, except if location
is explicitly indicated.
Args:
store_number: The store number if explicit location is to be read.
store_index: The index into the store, if explicit location is to be
read.
Returns:
An EventObject, either the next one or from a specific location.
"""
if store_number > 0:
return self.store.GetEventObject(store_number, store_index)
else:
return self.store.GetSortedEntry()
def WriteEvent(self, evt):
"""Write the output of a single entry to the output filehandle.
This method takes care of actually outputting each event in
question. It does so by first prepending it with potential
start of event, then processes the main body before appending
a potential end of event.
Args:
evt: An EventObject, defined in the event library.
"""
self.StartEvent()
self.EventBody(evt)
self.EndEvent()
@abc.abstractmethod
def EventBody(self, evt):
"""Writes the main body of an event to the output filehandle.
Args:
evt: An EventObject, defined in the event library.
Raises:
NotImplementedError: When not implemented.
"""
def StartEvent(self):
"""This should be extended by specific implementations.
This method does all preprocessing or output before each event
is printed, for instance to surround XML events with tags, etc.
"""
pass
def EndEvent(self):
"""This should be extended by specific implementations.
This method does all the post-processing or output after
each event has been printed, such as closing XML tags, etc.
"""
pass
def Start(self):
"""This should be extended by specific implementations.
Depending on the file format of the output it may need
a header. This method should return a header if one is
defined in that output format.
"""
pass
def End(self):
"""This should be extended by specific implementations.
Depending on the file format of the output it may need
a footer. This method should return a footer if one is
defined in that output format.
"""
pass
# Need to suppress this since these classes do not implement the
# abstract method EventBody, classes that inherit from one of these
# classes need to implement that function.
# pylint: disable=abstract-method
class FileLogOutputFormatter(LogOutputFormatter):
"""A simple file based output formatter."""
__abstract = True
def __init__(self, store, filehandle=sys.stdout, config=None,
filter_use=None):
"""Set up the formatter."""
super(FileLogOutputFormatter, self).__init__(
store, filehandle, config, filter_use)
if isinstance(filehandle, basestring):
open_filehandle = open(filehandle, 'wb')
elif hasattr(filehandle, 'write'):
open_filehandle = filehandle
else:
raise IOError(
u'Unable to determine how to use filehandle passed in: {}'.format(
type(filehandle)))
self.filehandle = OutputFilehandle(self.encoding)
self.filehandle.Open(open_filehandle)
def End(self):
"""Close the open filehandle after the last output."""
super(FileLogOutputFormatter, self).End()
self.filehandle.Close()
class EventBuffer(object):
"""Buffer class for EventObject output processing."""
MERGE_ATTRIBUTES = ['inode', 'filename', 'display_name']
def __init__(self, formatter, check_dedups=True):
"""Initialize the EventBuffer.
This class is used for buffering up events for duplicate removals
and for other post-processing/analysis of events before being presented
by the appropriate output module.
Args:
formatter: An OutputFormatter object.
check_dedups: Boolean value indicating whether or not the buffer should
check and merge duplicate entries or not.
"""
self._buffer_dict = {}
self._current_timestamp = 0
self.duplicate_counter = 0
self.check_dedups = check_dedups
self.formatter = formatter
self.formatter.Start()
def Append(self, event_object):
"""Append an EventObject into the processing pipeline.
Args:
event_object: The EventObject that is being added.
"""
if not self.check_dedups:
self.formatter.WriteEvent(event_object)
return
if event_object.timestamp != self._current_timestamp:
self._current_timestamp = event_object.timestamp
self.Flush()
key = event_object.EqualityString()
if key in self._buffer_dict:
self.JoinEvents(event_object, self._buffer_dict.pop(key))
self._buffer_dict[key] = event_object
def Flush(self):
"""Flushes the buffer by sending records to a formatter and prints."""
if not self._buffer_dict:
return
for event_object in self._buffer_dict.values():
try:
self.formatter.WriteEvent(event_object)
except errors.WrongFormatter as exception:
logging.error(u'Unable to write event: {:s}'.format(exception))
self._buffer_dict = {}
def JoinEvents(self, event_a, event_b):
"""Join this EventObject with another one."""
self.duplicate_counter += 1
# TODO: Currently we are using the first event pathspec, perhaps that
# is not the best approach. There is no need to have all the pathspecs
# inside the combined event, however which one should be chosen is
# perhaps something that can be evaluated here (regular TSK in favor of
# an event stored deep inside a VSS for instance).
for attr in self.MERGE_ATTRIBUTES:
val_a = set(utils.GetUnicodeString(getattr(event_a, attr, '')).split(';'))
val_b = set(utils.GetUnicodeString(getattr(event_b, attr, '')).split(';'))
values_list = list(val_a | val_b)
values_list.sort() # keeping this consistent across runs helps with diffs
setattr(event_a, attr, u';'.join(values_list))
# Special instance if this is a filestat entry we need to combine the
# description field.
if getattr(event_a, 'parser', u'') == 'filestat':
description_a = set(getattr(event_a, 'timestamp_desc', u'').split(';'))
description_b = set(getattr(event_b, 'timestamp_desc', u'').split(';'))
descriptions = list(description_a | description_b)
descriptions.sort()
if event_b.timestamp_desc not in event_a.timestamp_desc:
setattr(event_a, 'timestamp_desc', u';'.join(descriptions))
def End(self):
"""Call the formatter to produce the closing line."""
self.Flush()
if self.formatter:
self.formatter.End()
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make usable with "with" statement."""
self.End()
def __enter__(self):
"""Make usable with "with" statement."""
return self
class OutputFilehandle(object):
"""A simple wrapper for filehandles to make character encoding easier.
All data is stored as an unicode text internally. However there are some
issues with clients that try to output unicode text to a non-unicode terminal.
Therefore a wrapper is created that checks if we are writing to a file, thus
using the default unicode encoding or if the attempt is to write to the
terminal, for which the default encoding of that terminal is used to encode
the text (if possible).
"""
DEFAULT_ENCODING = 'utf-8'
def __init__(self, encoding='utf-8'):
"""Initialize the output file handler.
Args:
encoding: The default terminal encoding, only used if attempted to write
to the terminal.
"""
self._filehandle = None
self._encoding = encoding
# An attribute stating whether or not this is STDOUT.
self._standard_out = False
def Open(self, filehandle=sys.stdout, path=''):
"""Open a filehandle to an output file.
Args:
filehandle: A file-like-object that is used to write data to.
path: If a file like object is not passed in it is possible
to pass in a path to a file, and a file-like-objec will be created.
"""
if path:
self._filehandle = open(path, 'wb')
else:
self._filehandle = filehandle
if not hasattr(self._filehandle, 'name'):
self._standard_out = True
elif self._filehandle.name.startswith('<stdout>'):
self._standard_out = True
def WriteLine(self, line):
"""Write a single line to the supplied filehandle."""
if not self._filehandle:
return
if self._standard_out:
# Write using preferred user encoding.
try:
self._filehandle.write(line.encode(self._encoding))
except UnicodeEncodeError:
logging.error(
u'Unable to properly write logline, save output to a file to '
u'prevent missing data.')
self._filehandle.write(line.encode(self._encoding, 'ignore'))
else:
# Write to a file, use unicode.
self._filehandle.write(line.encode(self.DEFAULT_ENCODING))
def Close(self):
"""Close the filehandle, if applicable."""
if self._filehandle and not self._standard_out:
self._filehandle.close()
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make usable with "with" statement."""
self.Close()
def __enter__(self):
"""Make usable with "with" statement."""
return self
def GetOutputFormatter(output_string):
"""Return an output formatter that matches the provided string."""
# Format the output string (make the input case in-sensitive).
if type(output_string) not in (str, unicode):
return None
format_str = ''.join(
[output_string[0].upper(), output_string[1:].lower()])
return LogOutputFormatter.classes.get(format_str, None)
def ListOutputFormatters():
"""Generate a list of all available output formatters."""
for cl in LogOutputFormatter.classes:
formatter_class = LogOutputFormatter.classes[cl](None)
doc_string, _, _ = formatter_class.__doc__.partition('\n')
yield cl, doc_string