Import from old repository

This commit is contained in:
Stefan
2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
+16
View File
@@ -0,0 +1,16 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+1693
View File
File diff suppressed because it is too large Load Diff
+279
View File
@@ -0,0 +1,279 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the front-end object."""
import os
import unittest
from dfvfs.lib import definitions as dfvfs_definitions
from plaso.frontend import frontend
from plaso.frontend import test_lib
from plaso.lib import errors
from plaso.lib import storage
class ExtractionFrontendTests(test_lib.FrontendTestCase):
"""Tests for the extraction front-end object."""
def _TestScanSourceDirectory(self, test_file):
"""Tests the ScanSource function on a directory.
Args:
test_file: the path of the test file.
"""
test_front_end = frontend.ExtractionFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
options.source = test_file
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(path_spec.location, os.path.abspath(test_file))
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_OS)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, None)
def _TestScanSourceImage(self, test_file):
"""Tests the ScanSource function on the test image.
Args:
test_file: the path of the test file.
"""
test_front_end = frontend.ExtractionFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
options.source = test_file
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 0)
def _TestScanSourcePartitionedImage(self, test_file):
"""Tests the ScanSource function on the partitioned test image.
Args:
test_file: the path of the test file.
"""
test_front_end = frontend.ExtractionFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
options.source = test_file
options.image_offset_bytes = 0x0002c000
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 180224)
options = test_lib.Options()
options.source = test_file
options.image_offset = 352
options.bytes_per_sector = 512
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 180224)
options = test_lib.Options()
options.source = test_file
options.partition_number = 2
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 180224)
def _TestScanSourceVssImage(self, test_file):
"""Tests the ScanSource function on the VSS test image.
Args:
test_file: the path of the test file.
"""
test_front_end = frontend.ExtractionFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
options.source = test_file
options.vss_stores = '1,2'
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 0)
self.assertEquals(test_front_end._vss_stores, [1, 2])
options = test_lib.Options()
options.source = test_file
options.vss_stores = '1'
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 0)
self.assertEquals(test_front_end._vss_stores, [1])
options = test_lib.Options()
options.source = test_file
options.vss_stores = 'all'
test_front_end.ParseOptions(options)
test_front_end.ScanSource(options)
path_spec = test_front_end.GetSourcePathSpec()
self.assertNotEquals(path_spec, None)
self.assertEquals(
path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK)
# pylint: disable=protected-access
self.assertEquals(test_front_end._partition_offset, 0)
self.assertEquals(test_front_end._vss_stores, [1, 2])
def setUp(self):
"""Sets up the objects used throughout the test."""
self._input_reader = frontend.StdinFrontendInputReader()
self._output_writer = frontend.StdoutFrontendOutputWriter()
def testParseOptions(self):
"""Tests the parse options function."""
test_front_end = frontend.ExtractionFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
with self.assertRaises(errors.BadConfigOption):
test_front_end.ParseOptions(options)
options.source = self._GetTestFilePath([u'ímynd.dd'])
test_front_end.ParseOptions(options)
def testScanSource(self):
"""Tests the ScanSource function."""
test_file = self._GetTestFilePath([u'tsk_volume_system.raw'])
self._TestScanSourcePartitionedImage(test_file)
test_file = self._GetTestFilePath([u'image-split.E01'])
self._TestScanSourcePartitionedImage(test_file)
test_file = self._GetTestFilePath([u'image.E01'])
self._TestScanSourceImage(test_file)
test_file = self._GetTestFilePath([u'image.qcow2'])
self._TestScanSourceImage(test_file)
test_file = self._GetTestFilePath([u'vsstest.qcow2'])
self._TestScanSourceVssImage(test_file)
test_file = self._GetTestFilePath([u'text_parser'])
self._TestScanSourceDirectory(test_file)
test_file = self._GetTestFilePath([u'image.vhd'])
self._TestScanSourceImage(test_file)
test_file = self._GetTestFilePath([u'image.vmdk'])
self._TestScanSourceImage(test_file)
with self.assertRaises(errors.SourceScannerError):
test_file = self._GetTestFilePath(['nosuchfile.raw'])
self._TestScanSourceImage(test_file)
class AnalysisFrontendTests(test_lib.FrontendTestCase):
"""Tests for the analysis front-end object."""
def setUp(self):
"""Sets up the objects used throughout the test."""
self._input_reader = frontend.StdinFrontendInputReader()
self._output_writer = frontend.StdoutFrontendOutputWriter()
def testOpenStorageFile(self):
"""Tests the open storage file function."""
test_front_end = frontend.AnalysisFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
options.storage_file = self._GetTestFilePath([u'psort_test.out'])
test_front_end.ParseOptions(options)
storage_file = test_front_end.OpenStorageFile()
self.assertIsInstance(storage_file, storage.StorageFile)
storage_file.Close()
def testParseOptions(self):
"""Tests the parse options function."""
test_front_end = frontend.AnalysisFrontend(
self._input_reader, self._output_writer)
options = test_lib.Options()
with self.assertRaises(errors.BadConfigOption):
test_front_end.ParseOptions(options)
options.storage_file = self._GetTestFilePath([u'no_such_file.out'])
with self.assertRaises(errors.BadConfigOption):
test_front_end.ParseOptions(options)
options.storage_file = self._GetTestFilePath([u'psort_test.out'])
test_front_end.ParseOptions(options)
if __name__ == '__main__':
unittest.main()
+700
View File
@@ -0,0 +1,700 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The image export front-end."""
import argparse
import collections
import hashlib
import logging
import os
import sys
from dfvfs.helpers import file_system_searcher
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.artifacts import knowledge_base
from plaso.engine import collector
from plaso.engine import utils as engine_utils
from plaso.engine import queue
from plaso.engine import single_process
from plaso.frontend import frontend
from plaso.frontend import utils as frontend_utils
from plaso.lib import errors
from plaso.lib import timelib
from plaso.preprocessors import interface as preprocess_interface
from plaso.preprocessors import manager as preprocess_manager
def CalculateHash(file_object):
"""Return a hash for a given file object."""
md5 = hashlib.md5()
file_object.seek(0)
data = file_object.read(4098)
while data:
md5.update(data)
data = file_object.read(4098)
return md5.hexdigest()
class DateFilter(object):
"""Class that implements a date filter for file entries."""
DATE_FILTER_INSTANCE = collections.namedtuple(
'date_filter_instance', 'type start end')
DATE_FILTER_TYPES = frozenset([
u'atime', u'bkup', u'ctime', u'crtime', u'dtime', u'mtime'])
def __init__(self):
"""Initialize the date filter object."""
super(DateFilter, self).__init__()
self._filters = []
@property
def number_of_filters(self):
"""Return back the filter count."""
return len(self._filters)
def Add(self, filter_type, filter_start=None, filter_end=None):
"""Add a date filter.
Args:
filter_type: String that defines what timestamp is affected by the
date filter, valid values are atime, ctime, crtime,
dtime, bkup and mtime.
filter_start: Optional start date of the filter. This is a string
in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD".
If not supplied there will be no limitation to the initial
timeframe.
filter_end: Optional end date of the filter. This is a string
in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD".
If not supplied there will be no limitation to the initial
timeframe.
Raises:
errors.WrongFilterOption: If the filter is badly formed.
"""
if not isinstance(filter_type, basestring):
raise errors.WrongFilterOption(u'Filter type must be a string.')
if filter_start is None and filter_end is None:
raise errors.WrongFilterOption(
u'A date filter has to have either a start or an end date.')
filter_type_lower = filter_type.lower()
if filter_type_lower not in self.DATE_FILTER_TYPES:
raise errors.WrongFilterOption(u'Unknown filter type: {0:s}.'.format(
filter_type))
date_filter_type = filter_type_lower
date_filter_start = None
date_filter_end = None
if filter_start is not None:
# If the date string is invalid the timestamp will be set to zero,
# which is also a valid date. Thus all invalid timestamp strings
# will be set to filter from the POSIX epoch time.
# Thus the actual value of the filter is printed out so that the user
# may catch this potentially unwanted behavior.
date_filter_start = timelib.Timestamp.FromTimeString(filter_start)
logging.info(
u'Date filter for start date configured: [{0:s}] {1:s}'.format(
date_filter_type,
timelib.Timestamp.CopyToIsoFormat(date_filter_start)))
if filter_end is not None:
date_filter_end = timelib.Timestamp.FromTimeString(filter_end)
logging.info(
u'Date filter for end date configured: [{0:s}] {1:s}'.format(
date_filter_type,
timelib.Timestamp.CopyToIsoFormat(date_filter_end)))
# Make sure that the end timestamp occurs after the beginning.
# If not then we need to reverse the time range.
if (date_filter_start is not None and
date_filter_start > date_filter_end):
temporary_placeholder = date_filter_end
date_filter_end = date_filter_start
date_filter_start = temporary_placeholder
self._filters.append(self.DATE_FILTER_INSTANCE(
date_filter_type, date_filter_start, date_filter_end))
def CompareFileEntry(self, file_entry):
"""Compare the set date filters against timestamps of a file entry.
Args:
file_entry: The file entry (instance of dfvfs.FileEntry).
Returns:
True, if there are no date filters set. Otherwise the date filters are
compared and True only returned if the timestamps are outside of the time
range.
Raises:
errors.WrongFilterOption: If an attempt is made to filter against
a date type that is not stored in the stat
object.
"""
if not self._filters:
return True
# Compare timestamps of the file entry.
stat = file_entry.GetStat()
# Go over each filter.
for date_filter in self._filters:
posix_time = getattr(stat, date_filter.type, None)
if posix_time is None:
# Trying to filter against a date type that is not saved in the stat
# object.
raise errors.WrongFilterOption(
u'Date type: {0:s} is not stored in the file entry'.format(
date_filter.type))
timestamp = timelib.Timestamp.FromPosixTime(posix_time)
if date_filter.start is not None and (timestamp < date_filter.start):
logging.debug((
u'[skipping] Not saving file: {0:s}, timestamp out of '
u'range.').format(file_entry.path_spec.location))
return False
if date_filter.end is not None and (timestamp > date_filter.end):
logging.debug((
u'[skipping] Not saving file: {0:s}, timestamp out of '
u'range.').format(file_entry.path_spec.location))
return False
return True
def Remove(self, filter_type, filter_start=None, filter_end=None):
"""Remove a date filter from the set of defined date filters.
Args:
filter_type: String that defines what timestamp is affected by the
date filter, valid values are atime, ctime, crtime,
dtime, bkup and mtime.
filter_start: Optional start date of the filter. This is a string
in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD".
If not supplied there will be no limitation to the initial
timeframe.
filter_end: Optional end date of the filter. This is a string
in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD".
If not supplied there will be no limitation to the initial
timeframe.
"""
if not self._filters:
return
# TODO: Instead of doing it this way calculate a hash for every filter
# that is stored and use that for removals.
for date_filter_index, date_filter in enumerate(self._filters):
if filter_start is None:
date_filter_start = filter_start
else:
date_filter_start = timelib.Timestamp.FromTimeString(filter_start)
if filter_end is None:
date_filter_end = filter_end
else:
date_filter_end = timelib.Timestamp.FromTimeString(filter_end)
if (date_filter.type == filter_type and
date_filter.start == date_filter_start and
date_filter.end == date_filter_end):
del self._filters[date_filter_index]
return
def Reset(self):
"""Resets the date filter."""
self._filters = []
class FileSaver(object):
"""A simple class that is used to save files."""
md5_dict = {}
calc_md5 = False
# TODO: Move this functionality into the frontend as a state attribute.
_date_filter = None
@classmethod
def SetDateFilter(cls, date_filter):
"""Set a date filter for the file saver.
If a date filter is set files will not be saved unless they are within
the time boundaries.
Args:
date_filter: A date filter object (instance of DateFilter).
"""
cls._date_filter = date_filter
@classmethod
def WriteFile(cls, source_path_spec, destination_path, filename_prefix=''):
"""Writes the contents of the source to the destination file.
Args:
source_path_spec: the path specification of the source file.
destination_path: the path of the destination file.
filename_prefix: optional prefix for the filename. The default is an
empty string.
"""
file_entry = path_spec_resolver.Resolver.OpenFileEntry(source_path_spec)
directory = u''
filename = getattr(source_path_spec, 'location', None)
if not filename:
filename = source_path_spec.file_path
# There will be issues on systems that use a different separator than a
# forward slash. However a forward slash is always used in the pathspec.
if os.path.sep != u'/':
filename = filename.replace(u'/', os.path.sep)
if os.path.sep in filename:
directory_string, _, filename = filename.rpartition(os.path.sep)
if directory_string:
directory = os.path.join(
destination_path, *directory_string.split(os.path.sep))
if filename_prefix:
extracted_filename = u'{0:s}_{1:s}'.format(filename_prefix, filename)
else:
extracted_filename = filename
while extracted_filename.startswith(os.path.sep):
extracted_filename = extracted_filename[1:]
if directory:
if not os.path.isdir(directory):
os.makedirs(directory)
else:
directory = destination_path
if cls.calc_md5:
stat = file_entry.GetStat()
inode = getattr(stat, 'ino', 0)
file_object = file_entry.GetFileObject()
md5sum = CalculateHash(file_object)
if inode in cls.md5_dict:
if md5sum in cls.md5_dict[inode]:
return
cls.md5_dict[inode].append(md5sum)
else:
cls.md5_dict[inode] = [md5sum]
# Check if we do not want to save the file.
if cls._date_filter and not cls._date_filter.CompareFileEntry(file_entry):
return
try:
file_object = file_entry.GetFileObject()
frontend_utils.OutputWriter.WriteFile(
file_object, os.path.join(directory, extracted_filename))
except IOError as exception:
logging.error(
u'[skipping] unable to save file: {0:s} with error: {1:s}'.format(
filename, exception))
class ImageExtractorQueueConsumer(queue.ItemQueueConsumer):
"""Class that implements an image extractor queue consumer."""
def __init__(self, process_queue, extensions, destination_path):
"""Initializes the image extractor queue consumer.
Args:
process_queue: the process queue (instance of Queue).
extensions: a list of extensions.
destination_path: the path where the extracted files should be stored.
"""
super(ImageExtractorQueueConsumer, self).__init__(process_queue)
self._destination_path = destination_path
self._extensions = extensions
def _ConsumeItem(self, path_spec):
"""Consumes an item callback for ConsumeItems.
Args:
path_spec: a path specification (instance of dfvfs.PathSpec).
"""
# TODO: move this into a function of path spec e.g. GetExtension().
location = getattr(path_spec, 'location', None)
if not location:
location = path_spec.file_path
_, _, extension = location.rpartition('.')
if extension.lower() in self._extensions:
vss_store_number = getattr(path_spec, 'vss_store_number', None)
if vss_store_number is not None:
filename_prefix = 'vss_{0:d}'.format(vss_store_number + 1)
else:
filename_prefix = ''
FileSaver.WriteFile(
path_spec, self._destination_path, filename_prefix=filename_prefix)
class ImageExportFrontend(frontend.StorageMediaFrontend):
"""Class that implements the image export front-end."""
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(ImageExportFrontend, self).__init__(input_reader, output_writer)
self._knowledge_base = None
self._remove_duplicates = True
self._source_path_spec = None
# TODO: merge with collector and/or engine.
def _ExtractWithExtensions(self, extensions, destination_path):
"""Extracts files using extensions.
Args:
extensions: a list of extensions.
destination_path: the path where the extracted files should be stored.
"""
logging.info(u'Finding files with extensions: {0:s}'.format(extensions))
if not os.path.isdir(destination_path):
os.makedirs(destination_path)
input_queue = single_process.SingleProcessQueue()
# TODO: add support to handle multiple partitions.
self._source_path_spec = self.GetSourcePathSpec()
image_collector = collector.Collector(
input_queue, self._source_path, self._source_path_spec)
image_collector.Collect()
FileSaver.calc_md5 = self._remove_duplicates
input_queue_consumer = ImageExtractorQueueConsumer(
input_queue, extensions, destination_path)
input_queue_consumer.ConsumeItems()
# TODO: merge with collector and/or engine.
def _ExtractWithFilter(self, filter_file_path, destination_path):
"""Extracts files using a filter expression.
This method runs the file extraction process on the image and
potentially on every VSS if that is wanted.
Args:
filter_file_path: The path of the file that contains the filter
expressions.
destination_path: The path where the extracted files should be stored.
"""
# TODO: add support to handle multiple partitions.
self._source_path_spec = self.GetSourcePathSpec()
searcher = self._GetSourceFileSystemSearcher(
resolver_context=self._resolver_context)
if self._knowledge_base is None:
self._Preprocess(searcher)
if not os.path.isdir(destination_path):
os.makedirs(destination_path)
find_specs = engine_utils.BuildFindSpecsFromFile(
filter_file_path, pre_obj=self._knowledge_base.pre_obj)
# Save the regular files.
FileSaver.calc_md5 = self._remove_duplicates
for path_spec in searcher.Find(find_specs=find_specs):
FileSaver.WriteFile(path_spec, destination_path)
if self._process_vss and self._vss_stores:
volume_path_spec = self._source_path_spec.parent
logging.info(u'Extracting files from VSS.')
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/',
parent=volume_path_spec)
vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(vss_path_spec)
number_of_vss = vss_file_entry.number_of_sub_file_entries
# In plaso 1 represents the first store index in dfvfs and pyvshadow 0
# represents the first store index so 1 is subtracted.
vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]
for store_index in vss_store_range:
logging.info(u'Extracting files from VSS {0:d} out of {1:d}'.format(
store_index + 1, number_of_vss))
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index,
parent=volume_path_spec)
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=vss_path_spec)
filename_prefix = 'vss_{0:d}'.format(store_index)
file_system = path_spec_resolver.Resolver.OpenFileSystem(
path_spec, resolver_context=self._resolver_context)
searcher = file_system_searcher.FileSystemSearcher(
file_system, vss_path_spec)
for path_spec in searcher.Find(find_specs=find_specs):
FileSaver.WriteFile(
path_spec, destination_path, filename_prefix=filename_prefix)
# TODO: refactor, this is a duplicate of the function in engine.
def _GetSourceFileSystemSearcher(self, resolver_context=None):
"""Retrieves the file system searcher of the source.
Args:
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
Returns:
The file system searcher object (instance of dfvfs.FileSystemSearcher).
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_path_spec:
raise RuntimeError(u'Missing source.')
file_system = path_spec_resolver.Resolver.OpenFileSystem(
self._source_path_spec, resolver_context=resolver_context)
type_indicator = self._source_path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
mount_point = self._source_path_spec
else:
mount_point = self._source_path_spec.parent
return file_system_searcher.FileSystemSearcher(file_system, mount_point)
def _Preprocess(self, searcher):
"""Preprocesses the image.
Args:
searcher: The file system searcher object (instance of
dfvfs.FileSystemSearcher).
"""
if self._knowledge_base is not None:
return
self._knowledge_base = knowledge_base.KnowledgeBase()
logging.info(u'Guessing OS')
platform = preprocess_interface.GuessOS(searcher)
logging.info(u'OS: {0:s}'.format(platform))
logging.info(u'Running preprocess.')
preprocess_manager.PreprocessPluginsManager.RunPlugins(
platform, searcher, self._knowledge_base)
logging.info(u'Preprocess done, saving files from image.')
def ParseOptions(self, options, source_option='source'):
"""Parses the options and initializes the front-end.
Args:
options: the command line arguments (instance of argparse.Namespace).
source_option: optional name of the source option. The default is source.
Raises:
BadConfigOption: if the options are invalid.
"""
super(ImageExportFrontend, self).ParseOptions(
options, source_option=source_option)
filter_file = getattr(options, 'filter', None)
if not filter_file and not getattr(options, 'extension_string', None):
raise errors.BadConfigOption(
u'Neither an extension string or a filter is defined.')
if filter_file and not os.path.isfile(filter_file):
raise errors.BadConfigOption(
u'Unable to proceed, filter file: {0:s} does not exist.'.format(
filter_file))
if (getattr(options, 'no_vss', False) or
getattr(options, 'include_duplicates', False)):
self._remove_duplicates = False
# Process date filter.
date_filters = getattr(options, 'date_filters', [])
if date_filters:
date_filter_object = DateFilter()
for date_filter in date_filters:
date_filter_pieces = date_filter.split(',')
if len(date_filter_pieces) != 3:
raise errors.BadConfigOption(
u'Date filter badly formed: {0:s}'.format(date_filter))
filter_type, filter_start, filter_end = date_filter_pieces
date_filter_object.Add(
filter_type=filter_type.strip(), filter_start=filter_start.strip(),
filter_end=filter_end.strip())
# TODO: Move the date filter to the front-end as an attribute.
FileSaver.SetDateFilter(date_filter_object)
def ProcessSource(self, options):
"""Processes the source.
Args:
options: the command line arguments (instance of argparse.Namespace).
Raises:
SourceScannerError: if the source scanner could not find a supported
file system.
UserAbort: if the user initiated an abort.
"""
self.ScanSource(options)
filter_file = getattr(options, 'filter', None)
if filter_file:
self._ExtractWithFilter(filter_file, options.path)
extension_string = getattr(options, 'extension_string', None)
if extension_string:
extensions = [x.strip() for x in extension_string.split(',')]
self._ExtractWithExtensions(extensions, options.path)
logging.info(u'Files based on extension extracted.')
def Main():
"""The main function, running the show."""
front_end = ImageExportFrontend()
arg_parser = argparse.ArgumentParser(
description=(
'This is a simple collector designed to export files inside an '
'image, both within a regular RAW image as well as inside a VSS. '
'The tool uses a collection filter that uses the same syntax as a '
'targeted plaso filter.'),
epilog='And that\'s how you export files, plaso style.')
arg_parser.add_argument(
'-d', '--debug', dest='debug', action='store_true', default=False,
help='Turn on debugging information.')
arg_parser.add_argument(
'-w', '--write', dest='path', action='store', default='.', type=str,
help='The directory in which extracted files should be stored in.')
arg_parser.add_argument(
'-x', '--extensions', dest='extension_string', action='store',
type=str, metavar='EXTENSION_STRING', help=(
'If the purpose is to find all files given a certain extension '
'this options should be used. This option accepts a comma separated '
'string denoting all file extensions, eg: -x "csv,docx,pst".'))
arg_parser.add_argument(
'-f', '--filter', action='store', dest='filter', metavar='FILTER_FILE',
type=str, help=(
'Full path to the file that contains the collection filter, '
'the file can use variables that are defined in preprocesing, '
'just like any other log2timeline/plaso collection filter.'))
arg_parser.add_argument(
'--date-filter', '--date_filter', action='append', type=str,
dest='date_filters', metavar="TYPE_START_END", default=None, help=(
'Add a date based filter to the export criteria. If a date based '
'filter is set no file is saved unless it\'s within the date '
'boundary. This parameter should be in the form of "TYPE,START,END" '
'where TYPE defines which timestamp this date filter affects, eg: '
'atime, ctime, crtime, bkup, etc. START defines the start date and '
'time of the boundary and END defines the end time. Both timestamps '
'are optional and should be set as - if not needed. The correct form '
'of the timestamp value is in the form of "YYYY-MM-DD HH:MM:SS" or '
'"YYYY-MM-DD". Examples are "atime, 2013-01-01 23:12:14, 2013-02-23" '
'This parameter can be repeated as needed to add additional date '
'date boundaries, eg: once for atime, once for crtime, etc.'))
arg_parser.add_argument(
'--include_duplicates', dest='include_duplicates', action='store_true',
default=False, help=(
'By default if VSS is turned on all files saved will have their '
'MD5 sum calculated and compared to other files already saved '
'with the same inode value. If the MD5 sum is the same the file '
'does not get saved again. This option turns off that behavior '
'so that all files will get stored, even if they are duplicates.'))
front_end.AddImageOptions(arg_parser)
front_end.AddVssProcessingOptions(arg_parser)
arg_parser.add_argument(
'image', action='store', metavar='IMAGE', default=None, type=str, help=(
'The full path to the image file that we are about to extract files '
'from, it should be a raw image or another image that plaso '
'supports.'))
options = arg_parser.parse_args()
format_str = u'%(asctime)s [%(levelname)s] %(message)s'
if options.debug:
logging.basicConfig(level=logging.DEBUG, format=format_str)
else:
logging.basicConfig(level=logging.INFO, format=format_str)
try:
front_end.ParseOptions(options, source_option='image')
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error(u'{0:s}'.format(exception))
return False
try:
front_end.ProcessSource(options)
logging.info(u'Processing completed.')
except (KeyboardInterrupt, errors.UserAbort):
logging.warning(u'Aborted by user.')
return False
except errors.SourceScannerError as exception:
logging.warning((
u'Unable to scan for a supported filesystem with error: {0:s}\n'
u'Most likely the image format is not supported by the '
u'tool.').format(exception))
return False
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+237
View File
@@ -0,0 +1,237 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the image export front-end."""
import glob
import os
import shutil
import tempfile
import unittest
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.frontend import image_export
from plaso.frontend import test_lib
from plaso.lib import errors
class Log2TimelineFrontendTest(test_lib.FrontendTestCase):
"""Tests for the image export front-end."""
def setUp(self):
"""Sets up the objects used throughout the test."""
self._temp_directory = tempfile.mkdtemp()
def tearDown(self):
"""Cleans up the objects used throughout the test."""
shutil.rmtree(self._temp_directory, True)
def testProcessSourceExtractWithDateFilter(self):
"""Tests extract with file filter and date filter functionality."""
test_front_end = image_export.ImageExportFrontend()
options = test_lib.Options()
options.image = self._GetTestFilePath([u'image.qcow2'])
options.path = self._temp_directory
options.include_duplicates = True
options.filter = os.path.join(self._temp_directory, u'filter.txt')
with open(options.filter, 'wb') as file_object:
file_object.write('/a_directory/.+_file\n')
test_front_end.ParseOptions(options, source_option='image')
# Set the date filter.
filter_start = '2012-05-25 15:59:00'
filter_end = '2012-05-25 15:59:20'
date_filter_object = image_export.DateFilter()
date_filter_object.Add(
filter_start=filter_start, filter_end=filter_end,
filter_type='ctime')
image_export.FileSaver.SetDateFilter(date_filter_object)
test_front_end.ProcessSource(options)
expected_text_files = sorted([
os.path.join(self._temp_directory, u'a_directory', u'a_file')])
text_files = glob.glob(os.path.join(
self._temp_directory, u'a_directory', u'*'))
self.assertEquals(sorted(text_files), expected_text_files)
# We need to reset the date filter to not affect other tests.
# pylint: disable=protected-access
# TODO: Remove this once filtering has been moved to the front end object.
image_export.FileSaver._date_filter = None
def testProcessSourceExtractWithExtensions(self):
"""Tests extract with extensions process source functionality."""
test_front_end = image_export.ImageExportFrontend()
options = test_lib.Options()
options.image = self._GetTestFilePath([u'image.qcow2'])
options.path = self._temp_directory
options.extension_string = u'txt'
test_front_end.ParseOptions(options, source_option='image')
test_front_end.ProcessSource(options)
expected_text_files = sorted([
os.path.join(self._temp_directory, u'passwords.txt')])
text_files = glob.glob(os.path.join(self._temp_directory, u'*'))
self.assertEquals(sorted(text_files), expected_text_files)
def testProcessSourceExtractWithFilter(self):
"""Tests extract with filter process source functionality."""
test_front_end = image_export.ImageExportFrontend()
options = test_lib.Options()
options.image = self._GetTestFilePath([u'image.qcow2'])
options.path = self._temp_directory
options.filter = os.path.join(self._temp_directory, u'filter.txt')
with open(options.filter, 'wb') as file_object:
file_object.write('/a_directory/.+_file\n')
test_front_end.ParseOptions(options, source_option='image')
test_front_end.ProcessSource(options)
expected_text_files = sorted([
os.path.join(self._temp_directory, u'a_directory', u'another_file'),
os.path.join(self._temp_directory, u'a_directory', u'a_file')])
text_files = glob.glob(os.path.join(
self._temp_directory, u'a_directory', u'*'))
self.assertEquals(sorted(text_files), expected_text_files)
def testDateFilter(self):
"""Test the save file based on date filter function."""
# Open up a file entry.
path = self._GetTestFilePath([u'ímynd.dd'])
os_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=path)
tsk_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK, inode=16,
location=u'/a_directory/another_file', parent=os_path_spec)
file_entry = path_spec_resolver.Resolver.OpenFileEntry(tsk_path_spec)
# Timestamps of file:
# Modified: 2012-05-25T15:59:23+00:00
# Accessed: 2012-05-25T15:59:23+00:00
# Created: 2012-05-25T15:59:23+00:00
# Create the date filter object.
date_filter = image_export.DateFilter()
# No date filter set
self.assertTrue(
date_filter.CompareFileEntry(file_entry))
# Add a date to the date filter.
date_filter.Add(
filter_start='2012-05-25 15:59:20', filter_end='2012-05-25 15:59:25',
filter_type='ctime')
self.assertTrue(date_filter.CompareFileEntry(file_entry))
date_filter.Reset()
date_filter.Add(
filter_start='2012-05-25 15:59:24', filter_end='2012-05-25 15:59:55',
filter_type='ctime')
self.assertFalse(date_filter.CompareFileEntry(file_entry))
date_filter.Reset()
# Testing a timestamp that does not exist in the stat object.
date_filter.Add(filter_type='bkup', filter_start='2012-02-02 12:12:12')
with self.assertRaises(errors.WrongFilterOption):
date_filter.CompareFileEntry(file_entry)
# Testing adding a badly formatter filter.
with self.assertRaises(errors.WrongFilterOption):
date_filter.Add(filter_type='foobar', filter_start='2012-02-01 01:01:01')
date_filter.Reset()
# Testing adding a badly formatter filter, no date set.
with self.assertRaises(errors.WrongFilterOption):
date_filter.Add(filter_type='atime')
date_filter.Reset()
# Just end date set.
date_filter.Add(
filter_end='2012-05-25 15:59:55', filter_type='mtime')
self.assertTrue(date_filter.CompareFileEntry(file_entry))
date_filter.Reset()
# Just with a start date but within range.
date_filter.Add(
filter_start='2012-03-25 15:59:55', filter_type='atime')
self.assertTrue(date_filter.CompareFileEntry(file_entry))
date_filter.Reset()
# And now with a start date, but out of range.
date_filter.Add(
filter_start='2012-05-25 15:59:55', filter_type='ctime')
self.assertFalse(date_filter.CompareFileEntry(file_entry))
date_filter.Reset()
# Test with more than one date filter.
date_filter.Add(
filter_start='2012-05-25 15:59:55', filter_type='ctime',
filter_end='2012-05-25 17:34:12')
date_filter.Add(
filter_start='2012-05-25 15:59:20', filter_end='2012-05-25 15:59:25',
filter_type='atime')
date_filter.Add(
filter_start='2012-05-25 15:59:24', filter_end='2012-05-25 15:59:55',
filter_type='mtime')
self.assertFalse(date_filter.CompareFileEntry(file_entry))
self.assertEquals(date_filter.number_of_filters, 3)
# Remove a filter.
date_filter.Remove(
filter_start='2012-05-25 15:59:55', filter_type='ctime',
filter_end='2012-05-25 17:34:12')
self.assertEquals(date_filter.number_of_filters, 2)
# Remove a date filter that does not exist.
date_filter.Remove(
filter_start='2012-05-25 11:59:55', filter_type='ctime',
filter_end='2012-05-25 17:34:12')
self.assertEquals(date_filter.number_of_filters, 2)
date_filter.Add(
filter_end='2012-05-25 15:59:25', filter_type='atime')
self.assertEquals(date_filter.number_of_filters, 3)
date_filter.Remove(
filter_end='2012-05-25 15:59:25', filter_type='atime')
self.assertEquals(date_filter.number_of_filters, 2)
date_filter.Reset()
if __name__ == '__main__':
unittest.main()
+454
View File
@@ -0,0 +1,454 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The log2timeline front-end."""
import argparse
import logging
import multiprocessing
import sys
import time
import textwrap
import plaso
# Registering output modules so that output bypass works.
from plaso import output as _ # pylint: disable=unused-import
from plaso.frontend import frontend
from plaso.frontend import utils as frontend_utils
from plaso.lib import errors
from plaso.parsers import manager as parsers_manager
import pytz
class LoggingFilter(logging.Filter):
"""Class that implements basic filtering of log events for plaso.
Some libraries, like binplist, introduce excessive amounts of
logging that clutters down the debug logs of plaso, making them
almost non-usable. This class implements a filter designed to make
the debug logs more clutter-free.
"""
def filter(self, record):
"""Filter messages sent to the logging infrastructure."""
if record.module == 'binplist' and record.levelno == logging.DEBUG:
return False
return True
class Log2TimelineFrontend(frontend.ExtractionFrontend):
"""Class that implements the log2timeline front-end."""
_BYTES_IN_A_MIB = 1024 * 1024
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(Log2TimelineFrontend, self).__init__(input_reader, output_writer)
def _GetPluginData(self):
"""Return a dict object with a list of all available parsers and plugins."""
return_dict = {}
# Import all plugins and parsers to print out the necessary information.
# This is not import at top since this is only required if this parameter
# is set, otherwise these libraries get imported in their respected
# locations.
# The reason why some of these libraries are imported as '_' is to make sure
# all appropriate parsers and plugins are registered, yet we don't need to
# directly call these libraries, it is enough to load them up to get them
# registered.
# TODO: remove this hack includes should be a the top if this does not work
# remove the need for implicit behavior on import.
from plaso import filters
from plaso import parsers as _
from plaso import output as _
from plaso.frontend import presets
from plaso.lib import output
return_dict['Versions'] = [
('plaso engine', plaso.GetVersion()),
('python', sys.version)]
return_dict['Parsers'] = []
for _, parser_class in parsers_manager.ParsersManager.GetParsers():
description = getattr(parser_class, 'DESCRIPTION', u'')
return_dict['Parsers'].append((parser_class.NAME, description))
return_dict['Parser Lists'] = []
for category, parsers in sorted(presets.categories.items()):
return_dict['Parser Lists'].append((category, ', '.join(parsers)))
return_dict['Output Modules'] = []
for name, description in sorted(output.ListOutputFormatters()):
return_dict['Output Modules'].append((name, description))
return_dict['Plugins'] = []
for _, parser_class in parsers_manager.ParsersManager.GetParsers():
if parser_class.SupportsPlugins():
for _, plugin_class in parser_class.GetPlugins():
description = getattr(plugin_class, 'DESCRIPTION', u'')
return_dict['Plugins'].append((plugin_class.NAME, description))
return_dict['Filters'] = []
for filter_obj in sorted(filters.ListFilters()):
doc_string, _, _ = filter_obj.__doc__.partition('\n')
return_dict['Filters'].append((filter_obj.filter_name, doc_string))
return return_dict
def _GetTimeZones(self):
"""Returns a generator of the names of all the supported time zones."""
yield 'local'
for zone in pytz.all_timezones:
yield zone
def ListPluginInformation(self):
"""Lists all plugin and parser information."""
plugin_list = self._GetPluginData()
return_string_pieces = []
return_string_pieces.append(
u'{:=^80}'.format(u' log2timeline/plaso information. '))
for header, data in plugin_list.items():
# TODO: Using the frontend utils here instead of "self.PrintHeader"
# since the desired output here is a string that can be sent later
# to an output writer. Change this entire function so it can utilize
# PrintHeader or something similar.
return_string_pieces.append(frontend_utils.FormatHeader(header))
for entry_header, entry_data in sorted(data):
return_string_pieces.append(
frontend_utils.FormatOutputString(entry_header, entry_data))
return_string_pieces.append(u'')
self._output_writer.Write(u'\n'.join(return_string_pieces))
def ListTimeZones(self):
"""Lists the time zones."""
self._output_writer.Write(u'=' * 40)
self._output_writer.Write(u' ZONES')
self._output_writer.Write(u'-' * 40)
for timezone in self._GetTimeZones():
self._output_writer.Write(u' {0:s}'.format(timezone))
self._output_writer.Write(u'=' * 40)
def Main():
"""Start the tool."""
multiprocessing.freeze_support()
front_end = Log2TimelineFrontend()
epilog = u'\n'.join([
u'',
u'Example usage:',
u'',
u'Run the tool against an image (full kitchen sink)',
u' log2timeline.py /cases/mycase/plaso.dump ímynd.dd',
u'',
u'Instead of answering questions, indicate some of the options on the',
u'command line (including data from particular VSS stores).',
(u' log2timeline.py -o 63 --vss_stores 1,2 /cases/plaso_vss.dump '
u'image.E01'),
u'',
u'And that\'s how you build a timeline using log2timeline...',
u''])
description = u'\n'.join([
u'',
u'log2timeline is the main front-end to the plaso back-end, used to',
u'collect and correlate events extracted from a filesystem.',
u'',
u'More information can be gathered from here:',
u' http://plaso.kiddaland.net/usage/log2timeline',
u''])
arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description),
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(epilog), add_help=False)
# Create few argument groups to make formatting help messages clearer.
info_group = arg_parser.add_argument_group('Informational Arguments')
function_group = arg_parser.add_argument_group('Functional Arguments')
deep_group = arg_parser.add_argument_group('Deep Analysis Arguments')
performance_group = arg_parser.add_argument_group('Performance Arguments')
function_group.add_argument(
'-z', '--zone', '--timezone', dest='timezone', action='store', type=str,
default='UTC', help=(
u'Define the timezone of the IMAGE (not the output). This is usually '
u'discovered automatically by preprocessing but might need to be '
u'specifically set if preprocessing does not properly detect or to '
u'overwrite the detected time zone.'))
function_group.add_argument(
'-t', '--text', dest='text_prepend', action='store', type=unicode,
default=u'', metavar='TEXT', help=(
u'Define a free form text string that is prepended to each path '
u'to make it easier to distinguish one record from another in a '
u'timeline (like c:\\, or host_w_c:\\)'))
function_group.add_argument(
'--parsers', dest='parsers', type=str, action='store', default='',
metavar='PARSER_LIST', help=(
u'Define a list of parsers to use by the tool. This is a comma '
u'separated list where each entry can be either a name of a parser '
u'or a parser list. Each entry can be prepended with a minus sign '
u'to negate the selection (exclude it). The list match is an '
u'exact match while an individual parser matching is a case '
u'insensitive substring match, with support for glob patterns. '
u'Examples would be: "reg" that matches the substring "reg" in '
u'all parser names or the glob pattern "sky[pd]" that would match '
u'all parsers that have the string "skyp" or "skyd" in it\'s name. '
u'All matching is case insensitive.'))
info_group.add_argument(
'-h', '--help', action='help', help=u'Show this help message and exit.')
info_group.add_argument(
'--logfile', action='store', metavar='FILENAME', dest='logfile',
type=unicode, default=u'', help=(
u'If defined all log messages will be redirected to this file '
u'instead the default STDERR.'))
function_group.add_argument(
'-p', '--preprocess', dest='preprocess', action='store_true',
default=False, help=(
u'Turn on preprocessing. Preprocessing is turned on by default '
u'when parsing image files, however if a mount point is being '
u'parsed then this parameter needs to be set manually.'))
front_end.AddPerformanceOptions(performance_group)
performance_group.add_argument(
'--workers', dest='workers', action='store', type=int, default=0,
help=(u'The number of worker threads [defaults to available system '
u'CPU\'s minus three].'))
# TODO: seems to be no longer used, remove.
# function_group.add_argument(
# '-i', '--image', dest='image', action='store_true', default=False,
# help=(
# 'Indicates that this is an image instead of a regular file. It is '
# 'not necessary to include this option if -o (offset) is used, then '
# 'this option is assumed. Use this when parsing an image with an '
# 'offset of zero.'))
front_end.AddVssProcessingOptions(deep_group)
performance_group.add_argument(
'--single_thread', '--single-thread', '--single_process',
'--single-process', dest='single_process', action='store_true',
default=False, help=(
u'Indicate that the tool should run in a single process.'))
function_group.add_argument(
'-f', '--file_filter', '--file-filter', dest='file_filter',
action='store', type=unicode, default=None, help=(
u'List of files to include for targeted collection of files to '
u'parse, one line per file path, setup is /path|file - where each '
u'element can contain either a variable set in the preprocessing '
u'stage or a regular expression.'))
deep_group.add_argument(
'--scan_archives', dest='open_files', action='store_true', default=False,
help=argparse.SUPPRESS)
# This option is "hidden" for the time being, still left in there for testing
# purposes, but hidden from the tool usage and help messages.
# help=('Indicate that the tool should try to open files to extract embedd'
# 'ed files within them, for instance to extract files from compress'
# 'ed containers, etc. Be AWARE THAT THIS IS EXTREMELY SLOW.'))
front_end.AddImageOptions(function_group)
function_group.add_argument(
'--partition', dest='partition_number', action='store', type=int,
default=None, help=(
u'Choose a partition number from a disk image. This partition '
u'number should correspond to the partion number on the disk '
u'image, starting from partition 1.'))
# Build the version information.
version_string = u'log2timeline - plaso back-end {0:s}'.format(
plaso.GetVersion())
info_group.add_argument(
'-v', '--version', action='version', version=version_string,
help=u'Show the current version of the back-end.')
info_group.add_argument(
'--info', dest='show_info', action='store_true', default=False,
help=u'Print out information about supported plugins and parsers.')
info_group.add_argument(
'--show_memory_usage', '--show-memory-usage', action='store_true',
default=False, dest='foreman_verbose', help=(
u'Indicates that basic memory usage should be included in the '
u'output of the process monitor. If this option is not set the '
u'tool only displays basic status and counter information.'))
info_group.add_argument(
'--disable_worker_monitor', '--disable-worker-monitor',
action='store_false', default=True, dest='foreman_enabled', help=(
u'Turn off the foreman. The foreman monitors all worker processes '
u'and periodically prints out information about all running workers.'
u'By default the foreman is run, but it can be turned off using this '
u'parameter.'))
front_end.AddExtractionOptions(function_group)
function_group.add_argument(
'--output', dest='output_module', action='store', type=unicode,
default='', help=(
u'Bypass the storage module directly storing events according to '
u'the output module. This means that the output will not be in the '
u'pstorage format but in the format chosen by the output module. '
u'[Please not this feature is EXPERIMENTAL at this time, use at '
u'own risk (eg. sqlite output does not yet work)]'))
function_group.add_argument(
'--serializer-format', '--serializer_format', dest='serializer_format',
action='store', default='proto', metavar='FORMAT', help=(
u'By default the storage uses protobufs for serializing event '
u'objects. This parameter can be used to change that behavior. '
u'The choices are "proto" and "json".'))
front_end.AddInformationalOptions(info_group)
arg_parser.add_argument(
'output', action='store', metavar='STORAGE_FILE', nargs='?',
type=unicode, help=(
u'The path to the output file, if the file exists it will get '
u'appended to.'))
arg_parser.add_argument(
'source', action='store', metavar='SOURCE',
nargs='?', type=unicode, help=(
u'The path to the source device, file or directory. If the source is '
u'a supported storage media device or image file, archive file or '
u'a directory, the files within are processed recursively.'))
arg_parser.add_argument(
'filter', action='store', metavar='FILTER', nargs='?', default=None,
type=unicode, help=(
u'A filter that can be used to filter the dataset before it '
u'is written into storage. More information about the filters '
u'and it\'s usage can be found here: http://plaso.kiddaland.'
u'net/usage/filters'))
# Properly prepare the attributes according to local encoding.
if front_end.preferred_encoding == 'ascii':
logging.warning(
u'The preferred encoding of your system is ASCII, which is not optimal '
u'for the typically non-ASCII characters that need to be parsed and '
u'processed. The tool will most likely crash and die, perhaps in a way '
u'that may not be recoverable. A five second delay is introduced to '
u'give you time to cancel the runtime and reconfigure your preferred '
u'encoding, otherwise continue at own risk.')
time.sleep(5)
u_argv = [x.decode(front_end.preferred_encoding) for x in sys.argv]
sys.argv = u_argv
try:
options = arg_parser.parse_args()
except UnicodeEncodeError:
# If we get here we are attempting to print help in a "dumb" terminal.
print arg_parser.format_help().encode(front_end.preferred_encoding)
return False
if options.timezone == 'list':
front_end.ListTimeZones()
return True
if options.show_info:
front_end.ListPluginInformation()
return True
format_str = (
u'%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d '
u'<%(module)s> %(message)s')
if options.debug:
if options.logfile:
logging.basicConfig(
level=logging.DEBUG, format=format_str, filename=options.logfile)
else:
logging.basicConfig(level=logging.DEBUG, format=format_str)
logging_filter = LoggingFilter()
root_logger = logging.getLogger()
root_logger.addFilter(logging_filter)
elif options.logfile:
logging.basicConfig(
level=logging.INFO, format=format_str, filename=options.logfile)
else:
logging.basicConfig(level=logging.INFO, format=format_str)
if not options.output:
arg_parser.print_help()
print u''
arg_parser.print_usage()
print u''
logging.error(u'Wrong usage: need to define an output.')
return False
try:
front_end.ParseOptions(options)
front_end.SetStorageFile(options.output)
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error(u'{0:s}'.format(exception))
return False
# Configure the foreman (monitors workers).
front_end.SetShowMemoryInformation(show_memory=options.foreman_verbose)
front_end.SetRunForeman(run_foreman=options.foreman_enabled)
try:
front_end.ProcessSource(options)
logging.info(u'Processing completed.')
except (KeyboardInterrupt, errors.UserAbort):
logging.warning(u'Aborted by user.')
return False
except errors.SourceScannerError as exception:
logging.warning((
u'Unable to scan for a supported filesystem with error: {0:s}\n'
u'Most likely the image format is not supported by the '
u'tool.').format(exception))
return False
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+75
View File
@@ -0,0 +1,75 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the log2timeline front-end."""
import os
import shutil
import tempfile
import unittest
from plaso.frontend import log2timeline
from plaso.frontend import test_lib
from plaso.lib import pfilter
from plaso.lib import storage
class Log2TimelineFrontendTest(test_lib.FrontendTestCase):
"""Tests for the log2timeline front-end."""
def setUp(self):
"""Sets up the objects used throughout the test."""
# This is necessary since TimeRangeCache uses class members.
# TODO: remove this work around and properly fix TimeRangeCache.
pfilter.TimeRangeCache.ResetTimeConstraints()
self._temp_directory = tempfile.mkdtemp()
def tearDown(self):
"""Cleans up the objects used throughout the test."""
shutil.rmtree(self._temp_directory, True)
def testGetStorageInformation(self):
"""Tests the get storage information function."""
test_front_end = log2timeline.Log2TimelineFrontend()
options = test_lib.Options()
options.source = self._GetTestFilePath([u'ímynd.dd'])
storage_file_path = os.path.join(self._temp_directory, u'plaso.db')
test_front_end.ParseOptions(options)
test_front_end.SetStorageFile(storage_file_path=storage_file_path)
test_front_end.SetRunForeman(run_foreman=False)
test_front_end.ProcessSource(options)
try:
storage_file = storage.StorageFile(storage_file_path, read_only=True)
except IOError:
# This is not a storage file, we should fail.
self.assertTrue(False)
# Make sure we can read an event out of the storage.
event_object = storage_file.GetSortedEntry()
self.assertIsNotNone(event_object)
# TODO: add more tests that cover more of the functionality of the frontend.
if __name__ == '__main__':
unittest.main()
+266
View File
@@ -0,0 +1,266 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A simple dump information gathered from a plaso storage container.
pinfo stands for Plaso INniheldurFleiriOrd or plaso contains more words.
"""
# TODO: To make YAML loading work.
import argparse
import logging
import pprint
import sys
from plaso.frontend import frontend
from plaso.lib import errors
from plaso.lib import timelib
class PinfoFrontend(frontend.AnalysisFrontend):
"""Class that implements the pinfo front-end."""
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(PinfoFrontend, self).__init__(input_reader, output_writer)
self._printer = pprint.PrettyPrinter(indent=8)
self._verbose = False
def _AddCollectionInformation(self, lines_of_text, collection_information):
"""Adds the lines of text that make up the collection information.
Args:
lines_of_text: A list containing the lines of text.
collection_information: The collection information dict.
"""
filename = collection_information.get('file_processed', 'N/A')
time_of_run = collection_information.get('time_of_run', 0)
time_of_run = timelib.Timestamp.CopyToIsoFormat(time_of_run)
lines_of_text.append(u'Storage file:\t\t{0:s}'.format(
self._storage_file_path))
lines_of_text.append(u'Source processed:\t{0:s}'.format(filename))
lines_of_text.append(u'Time of processing:\t{0:s}'.format(time_of_run))
lines_of_text.append(u'')
lines_of_text.append(u'Collection information:')
for key, value in collection_information.items():
if key not in ['file_processed', 'time_of_run']:
lines_of_text.append(u'\t{0:s} = {1!s}'.format(key, value))
def _AddCounterInformation(
self, lines_of_text, description, counter_information):
"""Adds the lines of text that make up the counter information.
Args:
lines_of_text: A list containing the lines of text.
description: The counter information description.
counter_information: The counter information dict.
"""
lines_of_text.append(u'')
lines_of_text.append(u'{0:s}:'.format(description))
for key, value in counter_information.most_common():
lines_of_text.append(u'\tCounter: {0:s} = {1:d}'.format(key, value))
def _AddHeader(self, lines_of_text):
"""Adds the lines of text that make up the header.
Args:
lines_of_text: A list containing the lines of text.
"""
lines_of_text.append(u'-' * self._LINE_LENGTH)
lines_of_text.append(u'\t\tPlaso Storage Information')
lines_of_text.append(u'-' * self._LINE_LENGTH)
def _AddStoreInformation(self, lines_of_text, store_information):
"""Adds the lines of text that make up the store information.
Args:
lines_of_text: A list containing the lines of text.
store_information: The store information dict.
"""
lines_of_text.append(u'')
lines_of_text.append(u'Store information:')
lines_of_text.append(u'\tNumber of available stores: {0:d}'.format(
store_information['Number']))
if not self._verbose:
lines_of_text.append(
u'\tStore information details omitted (to see use: --verbose)')
else:
for key, value in store_information.iteritems():
if key not in ['Number']:
lines_of_text.append(
u'\t{0:s} =\n{1!s}'.format(key, self._printer.pformat(value)))
def _FormatStorageInformation(self, info, storage_file, last_entry=False):
"""Formats the storage information.
Args:
info: The storage information object (instance of PreprocessObject).
storage_file: The storage file (instance of StorageFile).
last_entry: Optional boolean value to indicate this is the last
information entry. The default is False.
Returns:
A string containing the formatted storage information.
"""
lines_of_text = []
collection_information = getattr(info, 'collection_information', None)
if collection_information:
self._AddHeader(lines_of_text)
self._AddCollectionInformation(lines_of_text, collection_information)
else:
lines_of_text.append(u'Missing collection information.')
counter_information = getattr(info, 'counter', None)
if counter_information:
self._AddCounterInformation(
lines_of_text, u'Parser counter information', counter_information)
counter_information = getattr(info, 'plugin_counter', None)
if counter_information:
self._AddCounterInformation(
lines_of_text, u'Plugin counter information', counter_information)
store_information = getattr(info, 'stores', None)
if store_information:
self._AddStoreInformation(lines_of_text, store_information)
information = u'\n'.join(lines_of_text)
if not self._verbose:
preprocessing = (
u'Preprocessing information omitted (to see use: --verbose).')
else:
preprocessing = u'Preprocessing information:\n'
for key, value in info.__dict__.items():
if key == 'collection_information':
continue
elif key == 'counter' or key == 'stores':
continue
if isinstance(value, list):
preprocessing += u'\t{0:s} =\n{1!s}\n'.format(
key, self._printer.pformat(value))
else:
preprocessing += u'\t{0:s} = {1!s}\n'.format(key, value)
if not last_entry:
reports = u''
elif storage_file.HasReports():
reports = u'Reporting information omitted (to see use: --verbose).'
else:
reports = u'No reports stored.'
if self._verbose and last_entry and storage_file.HasReports():
report_list = []
for report in storage_file.GetReports():
report_list.append(report.GetString())
reports = u'\n'.join(report_list)
return u'\n'.join([
information, u'', preprocessing, u'', reports, u'-+' * 40])
def GetStorageInformation(self):
"""Returns a formatted storage information generator."""
try:
storage_file = self.OpenStorageFile()
except IOError as exception:
logging.error(
u'Unable to open storage file: {0:s} with error: {1:s}'.format(
self._storage_file_path, exception))
return
list_of_storage_information = storage_file.GetStorageInformation()
if not list_of_storage_information:
yield ''
return
last_entry = False
for index, info in enumerate(list_of_storage_information):
if index + 1 == len(list_of_storage_information):
last_entry = True
yield self._FormatStorageInformation(
info, storage_file, last_entry=last_entry)
def ParseOptions(self, options):
"""Parses the options and initializes the front-end.
Args:
options: the command line arguments (instance of argparse.Namespace).
Raises:
BadConfigOption: if the options are invalid.
"""
super(PinfoFrontend, self).ParseOptions(options)
self._verbose = getattr(options, 'verbose', False)
def Main():
"""Start the tool."""
front_end = PinfoFrontend()
usage = """
Gives you information about the storage file, how it was
collected, what information was gained from the image, etc.
"""
arg_parser = argparse.ArgumentParser(description=usage)
format_str = '[%(levelname)s] %(message)s'
logging.basicConfig(level=logging.INFO, format=format_str)
arg_parser.add_argument(
'-v', '--verbose', dest='verbose', action='store_true', default=False,
help='Be extra verbose in the information printed out.')
front_end.AddStorageFileOptions(arg_parser)
options = arg_parser.parse_args()
try:
front_end.ParseOptions(options)
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error(u'{0:s}'.format(exception))
return False
storage_information_found = False
for storage_information in front_end.GetStorageInformation():
storage_information_found = True
print storage_information.encode(front_end.preferred_encoding)
if not storage_information_found:
print u'No Plaso storage information found.'
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+65
View File
@@ -0,0 +1,65 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for test pinfo front-end."""
import os
import unittest
from plaso.frontend import pinfo
from plaso.frontend import test_lib
class PinfoFrontendTest(test_lib.FrontendTestCase):
"""Tests for test pinfo front-end."""
def testGetStorageInformation(self):
"""Tests the get storage information function."""
test_front_end = pinfo.PinfoFrontend()
options = test_lib.Options()
options.storage_file = os.path.join(self._TEST_DATA_PATH, 'psort_test.out')
test_front_end.ParseOptions(options)
storage_information_list = list(test_front_end.GetStorageInformation())
self.assertEquals(len(storage_information_list), 1)
lines_of_text = storage_information_list[0].split(u'\n')
expected_line_of_text = u'-' * 80
self.assertEquals(lines_of_text[0], expected_line_of_text)
self.assertEquals(lines_of_text[2], expected_line_of_text)
self.assertEquals(lines_of_text[1], u'\t\tPlaso Storage Information')
expected_line_of_text = u'Storage file:\t\t{0:s}'.format(
options.storage_file)
self.assertEquals(lines_of_text[3], expected_line_of_text)
self.assertEquals(lines_of_text[4], u'Source processed:\tsyslog')
expected_line_of_text = u'Time of processing:\t2014-02-15T04:33:16+00:00'
self.assertEquals(lines_of_text[5], expected_line_of_text)
self.assertEquals(lines_of_text[6], u'')
self.assertEquals(lines_of_text[7], u'Collection information:')
if __name__ == '__main__':
unittest.main()
+832
View File
@@ -0,0 +1,832 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the plasm front-end to plaso."""
import argparse
import hashlib
import logging
import operator
import os
import pickle
import sets
import sys
import textwrap
from plaso import filters
from plaso.frontend import frontend
from plaso.lib import errors
from plaso.lib import event
from plaso.lib import output as output_lib
from plaso.lib import storage
from plaso.output import pstorage # pylint: disable=unused-import
class PlasmFrontend(frontend.AnalysisFrontend):
"""Class that implements the psort front-end."""
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(PlasmFrontend, self).__init__(input_reader, output_writer)
self._cluster_closeness = None
self._cluster_threshold = None
self._quiet = False
self._tagging_file_path = None
self.mode = None
def ClusterEvents(self):
"""Clusters the event objects in the storage file."""
clustering_engine = ClusteringEngine(
self._storage_file_path, self._cluster_threshold,
self._cluster_closeness)
clustering_engine.Run()
def GroupEvents(self):
"""Groups the event objects in the storage file.
Raises:
RuntimeError: if a non-recoverable situation is encountered.
"""
if not self._quiet:
self._output_writer.Write(u'Grouping tagged events.\n')
try:
storage_file = self.OpenStorageFile(read_only=False)
except IOError as exception:
raise RuntimeError(
u'Unable to open storage file: {0:s} with error: {1:s}.'.format(
self._storage_file_path, exception))
grouping_engine = GroupingEngine()
grouping_engine.Run(storage_file, quiet=self._quiet)
storage_file.Close()
if not self._quiet:
self._output_writer.Write(u'Grouping DONE.\n')
def TagEvents(self):
"""Tags the event objects in the storage file."""
tagging_engine = TaggingEngine(
self._storage_file_path, self._tagging_file_path, quiet=self._quiet)
tagging_engine.Run()
def ParseOptions(self, options):
"""Parses the options and initializes the front-end.
Args:
options: the command line arguments (instance of argparse.Namespace).
Raises:
BadConfigOption: if the options are invalid.
"""
super(PlasmFrontend, self).ParseOptions(options)
self.mode = getattr(options, 'subcommand', None)
if not self.mode:
raise errors.BadConfigOption(u'Missing mode subcommand.')
if self.mode not in ['cluster', 'group', 'tag']:
raise errors.BadConfigOption(
u'Unsupported mode subcommand: {0:s}.'.format(self.mode))
if self.mode == 'cluster':
self._cluster_threshold = getattr(options, 'cluster_threshold', None)
if not self._cluster_threshold:
raise errors.BadConfigOption(u'Missing cluster threshold value.')
try:
self._cluster_threshold = int(self._cluster_threshold, 10)
except ValueError:
raise errors.BadConfigOption(u'Invalid cluster threshold value.')
self._cluster_closeness = getattr(options, 'cluster_closeness', None)
if not self._cluster_closeness:
raise errors.BadConfigOption(u'Missing cluster closeness value.')
try:
self._cluster_closeness = int(self._cluster_closeness, 10)
except ValueError:
raise errors.BadConfigOption(u'Invalid cluster closeness value.')
elif self.mode == 'tag':
tagging_file_path = getattr(options, 'tag_filename', None)
if not tagging_file_path:
raise errors.BadConfigOption(u'Missing tagging file path.')
if not os.path.isfile(tagging_file_path):
errors.BadConfigOption(
u'No such tagging file: {0:s}'.format(tagging_file_path))
self._tagging_file_path = tagging_file_path
def SetupStorage(input_file_path, pre_obj=None):
"""Sets up the storage object.
Attempts to initialize a storage file. If we fail on a IOError, for which
a common cause are typos, log a warning and gracefully exit.
Args:
input_file_path: Filesystem path to the plaso storage container.
pre_obj: A plaso preprocessing object.
Returns:
A storage.StorageFile object.
"""
try:
return storage.StorageFile(
input_file_path, pre_obj=pre_obj, read_only=False)
except IOError as exception:
logging.error(u'IO ERROR: {0:s}'.format(exception))
else:
logging.error(u'Other Critical Failure Reading Files')
sys.exit(1)
def EventObjectGenerator(plaso_storage, quiet=False):
"""Yields EventObject objects.
Yields event_objects out of a StorageFile object. If the 'quiet' argument
is not present, it also outputs 50 '.'s indicating progress.
Args:
plaso_storage: a storage.StorageFile object.
quiet: boolean value indicating whether to suppress progress output.
Yields:
EventObject objects.
"""
total_events = plaso_storage.GetNumberOfEvents()
if total_events > 0:
events_per_dot = operator.floordiv(total_events, 80)
counter = 0
else:
quiet = True
event_object = plaso_storage.GetSortedEntry()
while event_object:
if not quiet:
counter += 1
if counter % events_per_dot == 0:
sys.stdout.write(u'.')
sys.stdout.flush()
yield event_object
event_object = plaso_storage.GetSortedEntry()
def ParseTaggingFile(tag_input):
"""Parses Tagging Input file.
Parses a tagging input file and returns a dictionary of tags, where each
key represents a tag and each entry is a list of plaso filters.
Args:
tag_input: filesystem path to the tagging input file.
Returns:
A dictionary whose keys are tags and values are EventObjectFilter objects.
"""
with open(tag_input, 'rb') as tag_input_file:
tags = {}
current_tag = u''
for line in tag_input_file:
line_rstrip = line.rstrip()
line_strip = line_rstrip.lstrip()
if not line_strip or line_strip.startswith(u'#'):
continue
if not line_rstrip[0].isspace():
current_tag = line_rstrip
tags[current_tag] = []
else:
if not current_tag:
continue
compiled_filter = filters.GetFilter(line_strip)
if compiled_filter:
if compiled_filter not in tags[current_tag]:
tags[current_tag].append(compiled_filter)
else:
logging.warning(u'Tag "{0:s}" contains invalid filter: {1:s}'.format(
current_tag, line_strip))
return tags
class TaggingEngine(object):
"""Class that defines a tagging engine."""
def __init__(self, target_filename, tag_input, quiet=False):
"""Initializes the tagging engine object.
Args:
target_filename: filename for a Plaso storage file to be tagged.
tag_input: filesystem path to the tagging input file.
quiet: Optional boolean value to indicate the progress output should
be suppressed. The default is False.
"""
self.target_filename = target_filename
self.tag_input = tag_input
self._quiet = quiet
def Run(self):
"""Iterates through a Plaso Store file, tagging events according to the
tagging input file specified on the command line. It writes the tagging
information to the Plaso Store file."""
pre_obj = event.PreprocessObject()
pre_obj.collection_information = {}
pre_obj.collection_information['file_processed'] = self.target_filename
pre_obj.collection_information['method'] = u'Applying tags.'
pre_obj.collection_information['tag_file'] = self.tag_input
pre_obj.collection_information['tagging_engine'] = u'plasm'
if not self._quiet:
sys.stdout.write(u'Applying tags...\n')
with SetupStorage(self.target_filename, pre_obj) as store:
tags = ParseTaggingFile(self.tag_input)
num_tags = 0
event_tags = []
for event_object in EventObjectGenerator(store, self._quiet):
matched_tags = []
for tag, my_filters in tags.iteritems():
for my_filter in my_filters:
if my_filter.Match(event_object):
matched_tags.append(tag)
# Don't want to evaluate other tags once a tag is discovered.
break
if len(matched_tags) > 0:
event_tag = event.EventTag()
event_tag.store_number = getattr(event_object, 'store_number')
event_tag.store_index = getattr(event_object, 'store_index')
event_tag.comment = u'Tag applied by PLASM tagging engine'
event_tag.tags = matched_tags
event_tags.append(event_tag)
num_tags += 1
store.StoreTagging(event_tags)
if not self._quiet:
sys.stdout.write(u'DONE (applied {} tags)\n'.format(num_tags))
class GroupingEngine(object):
"""Class that defines a grouping engine."""
def _GroupEvents(self, storage_file, tags, quiet=False):
"""Separates each tag list into groups, and writes them to the Plaso Store.
Args:
storage_file: the storage file (instance of StorageFile).
tags: dictionary of the form {tag: [event_object, ...]}.
quiet: suppress the progress output (default: False).
"""
# TODO(ojensen): make this smarter - for now, separates via time interval.
time_interval = 1000000 # 1 second.
groups = []
for tag in tags:
if not quiet:
sys.stdout.write(u' proccessing tag "{0:s}"...\n'.format(tag))
locations = tags[tag]
last_time = 0
groups_in_tag = 0
for location in locations:
store_number, store_index = location
# TODO(ojensen): getting higher number event_objects seems to be slow.
event_object = storage_file.GetEventObject(store_number, store_index)
if not hasattr(event_object, 'timestamp'):
continue
timestamp = getattr(event_object, 'timestamp')
if timestamp - last_time > time_interval:
groups_in_tag += 1
groups.append(type('obj', (object,), {
'name': u'{0:s}:{1:d}'.format(tag, groups_in_tag),
'category': tag,
'events': [location]}))
else:
groups[-1].events.append(location)
last_time = timestamp
return groups
# TODO: move this functionality to storage.
def _ReadTags(self, storage_file):
"""Iterates through an opened Plaso Store, creating a dictionary of tags
pointing to a list of events.
Args:
storage_file: the storage file (instance of StorageFile).
"""
all_tags = {}
for event_tag in storage_file.GetTagging():
tags = event_tag.tags
location = (event_tag.store_number, event_tag.store_index)
for tag in tags:
if tag in all_tags:
all_tags[tag].append(location)
else:
all_tags[tag] = [location]
return all_tags
def Run(self, storage_file, quiet=False):
"""Iterates through a tagged Plaso Store file, grouping events with the same
tag into groups indicating a single instance of an action. It writes the
grouping information to the Plaso Store file.
Args:
storage_file: the storage file (instance of StorageFile).
quiet: Optional boolean value to indicate the progress output should
be suppressed. The default is False.
"""
if not storage_file.HasTagging():
logging.error(u'Plaso storage file does not contain tagged events')
return
tags = self._ReadTags(storage_file)
groups = self._GroupEvents(storage_file, tags, quiet)
storage_file.StoreGrouping(groups)
class ClusteringEngine(object):
"""Clusters events in a Plaso Store to assist Tag Input creation.
Most methods in this class are staticmethods, to avoid relying excessively on
internal state, and to maintain a clear description of which method acts on
what data.
"""
IGNORE_BASE = frozenset([
'hostname', 'timestamp_desc', 'plugin', 'parser', 'user_sid',
'registry_type', 'computer_name', 'offset', 'allocated', 'file_size',
'record_number'])
def __init__(self, target_filename, threshold, closeness):
"""Constructor for the Clustering Engine.
Args:
target_filename: filename for a Plaso storage file to be clustered.
threshold: support threshold for pruning attributes and event types.
closeness: number of milliseconds to cut off the closeness function.
"""
self.target_filename = target_filename
self.threshold = threshold
self.closeness = closeness
sys.stdout.write("Support threshold: {0:d}\nCloseness: {1:d}ms\n\n".format(
threshold, closeness))
self.ignore = False
self.frequent_words = []
self.vector_size = 20000
@staticmethod
def HashFile(filename, block_size=2**20):
"""Calculates an md5sum of a file from a given filename.
Returns an MD5 (hash) in ASCII characters, used for naming incremental
progress files that are written to disk.
Args:
filename: the file to be hashed.
block_size: (optional) block size.
"""
md5 = hashlib.md5()
with open(filename, 'rb') as f:
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()
@staticmethod
def StringJoin(first, second):
"""Joins two strings together with a separator.
In spite of being fairly trivial, this is separated out as a function of
its own to ensure it stays consistent, as it happens in multiple places in
the code.
Args:
first: first string.
second: second string.
"""
return u':||:'.join([unicode(first), unicode(second)])
@staticmethod
def PreHash(field_name, attribute):
"""Constructs a string fit to be hashed from an event_object attribute.
Takes both the attribute's name and value, and produces a consistent string
representation. This string can then be hashed to produce a consistent
name/value hash (see hash_attr).
Args:
field_name: an event_object attribute name.
attribute: the corresponding event_object attribute.
"""
if type(attribute) in [dict, sets.Set]:
value = repr(sorted(attribute.items()))
else:
value = unicode(attribute)
return ClusteringEngine.StringJoin(field_name, value)
@staticmethod
def HashAttr(field_name, attribute, vector_size):
"""Consistently hashes an event_object attribute/value pair.
Uses pre_hash to generate a consistent string representation of the
attribute, and then hashes and mods it down to fit within the vector_size.
Args:
field_name: an event_object attribute name.
attribute: the corresponding event_object attribute.
"""
return hash(ClusteringEngine.PreHash(field_name, attribute)) % vector_size
@staticmethod
def EventRepresentation(event_object, ignore, frequent_words=None):
"""Constructs a consistent representation of an event_object.
Returns a dict representing our view of an event_object, stripping out
attributes we ignore. If the frequent_words parameter is set, this strips
out any attribute not listed therein as well. Attribute list order is
undefined, i.e. event_object list attributes are treated as sets instead of
lists.
Args:
event_object: a Plaso event_object.
ignore: a list or set of event_object attributes to ignore.
frequent_words: (optional) whitelist of attributes not to ignore.
"""
if not frequent_words:
frequent_words = []
event_field_names = event_object.GetAttributes().difference(ignore)
representation = {}
for field_name in event_field_names:
attribute = getattr(event_object, field_name)
if hasattr(attribute, '__iter__'):
if isinstance(attribute, dict):
indices = sorted(attribute.keys())
else:
indices = range(len(attribute))
for index in indices:
# quick fix to ignore list order.
index_identifier = index if isinstance(attribute, dict) else ''
subfield_name = ':plasm-sub:'.join(
[field_name, unicode(index_identifier)])
if not frequent_words or ClusteringEngine.StringJoin(
subfield_name, attribute[index]) in frequent_words:
representation[subfield_name] = attribute[index]
else:
if not frequent_words or ClusteringEngine.StringJoin(
field_name, attribute) in frequent_words:
representation[field_name] = attribute
return representation
def EventObjectRepresentationGenerator(self, filename, frequent_words=None):
"""Yields event_representations.
Yields event_representations from a plaso store. Essentially it simply wraps
the EventObjectGenerator and yields event_representations of the resulting
event_objects. If frequent_words is set, the event representation will
exclude any attributes not listed in the frequent_words list.
Args:
filename: a Plaso Store filename.
frequent_words: (optional) whitelist of attributes not to ignore.
"""
with SetupStorage(filename) as store:
for event_object in EventObjectGenerator(store):
if not self.ignore:
self.ignore = event_object.COMPARE_EXCLUDE.union(self.IGNORE_BASE)
yield ClusteringEngine.EventRepresentation(
event_object, self.ignore, frequent_words)
def NoDuplicates(self, dump_filename):
"""Saves a de-duped Plaso Storage.
This goes through the Plaso storage file, and saves a new dump with
duplicates removed. The filename is '.[dump_hash]_dedup', and is returned
at the end of the function. Note that if this function is interrupted,
incomplete results are recorded and this file must be deleted or subsequent
runs will use this incomplete data.
Args:
dump_filename: the filename of the Plaso Storage to be deduped.
"""
sys.stdout.write(u'Removing duplicates...\n')
sys.stdout.flush()
# Whether these incremental files should remain a feature or not is still
# being decided. They're just here for now to make development faster.
nodup_filename = '.{}_dedup'.format(self.plaso_hash)
if os.path.isfile(nodup_filename):
sys.stdout.write(u'Using previously calculated results.\n')
else:
with SetupStorage(dump_filename) as store:
total_events = store.GetNumberOfEvents()
events_per_dot = operator.floordiv(total_events, 80)
formatter_cls = output_lib.GetOutputFormatter('Pstorage')
store_dedup = open(nodup_filename, 'wb')
formatter = formatter_cls(store, store_dedup)
with output_lib.EventBuffer(
formatter, check_dedups=True) as output_buffer:
event_object = formatter.FetchEntry()
counter = 0
while event_object:
output_buffer.Append(event_object)
counter += 1
if counter % events_per_dot == 0:
sys.stdout.write(u'.')
sys.stdout.flush()
event_object = formatter.FetchEntry()
sys.stdout.write(u'\n')
return nodup_filename
def ConstructHashVector(self, nodup_filename, vector_size):
"""Constructs the vector which tallies the hashes of attributes.
The purpose of this vector is to save memory. Since many attributes are
fairly unique, we first hash them and keep a count of how many times the
hash appears. Later when constructing our vocabulary, we can ignore any
attributes whose hash points to a value in this vector smaller than the
support threshold value, since we are guaranteed that it appears in the
data at most this tally number of times.
Args:
nodup_filename: the filename of a de-duplicated plaso storage file.
vector_size: size of this vector.
"""
sys.stdout.write(u'Constructing word vector...\n')
sys.stdout.flush()
vector_filename = '.{0:s}_vector_{1:s}'.format(
self.plaso_hash, vector_size)
if os.path.isfile(vector_filename):
sys.stdout.write(u'Using previously calculated results.\n')
x = open(vector_filename, 'rb')
vector = pickle.load(x)
x.close()
else:
vector = [0]*vector_size
for representation in self.EventObjectRepresentationGenerator(
nodup_filename):
for field_name, attribute in representation.iteritems():
index = ClusteringEngine.HashAttr(field_name, attribute, vector_size)
vector[index] += 1
x = open(vector_filename, 'wb')
pickle.dump(vector, x)
x.close()
sys.stdout.write(u'\n')
return vector
def FindFrequentWords(self, nodup_filename, threshold, vector=None):
"""Constructs a list of attributes which appear "often".
This goes through a plaso store, and finds all name-attribute pairs which
appear no less than the support threshold value number of times. If
available it uses the hash vector in order to ignore attributes and save
memory.
Args:
nodup_filename: the filename of a de-duplicated plaso storage file.
threshold: the support threshold value.
vector: (optional) vector of hash tallies.
"""
if not vector:
vector = []
sys.stdout.write(u'Constructing 1-dense clusters... \n')
sys.stdout.flush()
frequent_filename = '.{0:s}_freq_{1:s}'.format(
self.plaso_hash, str(threshold))
if os.path.isfile(frequent_filename):
sys.stdout.write(u'Using previously calculated results.\n')
x = open(frequent_filename, 'rb')
frequent_words = pickle.load(x)
x.close()
else:
word_count = {}
vector_size = len(vector)
for representation in self.EventObjectRepresentationGenerator(
nodup_filename):
for field_name, attribute in representation.iteritems():
word = ClusteringEngine.PreHash(field_name, attribute)
keep = vector[hash(word) % vector_size] > threshold
if not vector_size or keep:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
wordlist = [word for word in word_count if word_count[word] >= threshold]
frequent_words = sets.Set(wordlist)
x = open(frequent_filename, 'wb')
pickle.dump(frequent_words, x)
x.close()
sys.stdout.write(u'\n')
return frequent_words
def BuildEventTypes(self, nodup_filename, threshold, frequent_words):
"""Builds out the event_types from the frequent attributes.
This uses the frequent words set in order to ignore attributes from plaso
events and thereby create event_types (events which have infrequent
attributes ignored). Currently event types which do not appear at least
as ofter as the support threshold dictates are ignored, although whether
this is what we actually want is still under consideration. Returns the
list of event types, as well as a reverse-lookup structure.
Args:
nodup_filename: the filename of a de-duplicated plaso storage file.
threshold: the support threshold value.
frequent_words: the set of attributes not to ignore.
"""
sys.stdout.write(u'Calculating event type candidates...\n')
sys.stdout.flush()
eventtype_filename = ".{0:s}_evtt_{1:s}".format(
self.plaso_hash, str(threshold))
if os.path.isfile(eventtype_filename):
sys.stdout.write(u'Using previously calculated results.\n')
x = open(eventtype_filename, 'rb')
evttypes = pickle.load(x)
evttype_indices = pickle.load(x)
x.close()
else:
evttype_candidates = {}
for representation in self.EventObjectRepresentationGenerator(
nodup_filename, frequent_words=frequent_words):
candidate = repr(representation)
if candidate in evttype_candidates:
evttype_candidates[candidate] += 1
else:
evttype_candidates[candidate] = 1
sys.stdout.write(u'\n')
# clean up memory a little
sys.stdout.write(u'Pruning event type candidates...')
sys.stdout.flush()
evttypes = []
evttype_indices = {}
for candidate, score in evttype_candidates.iteritems():
if score < threshold:
evttype_indices[candidate] = len(evttypes)
evttypes.append(candidate)
del evttype_candidates
# write everything out
x = open(eventtype_filename, 'wb')
pickle.dump(evttypes, x)
pickle.dump(evttype_indices, x)
x.close()
sys.stdout.write(u'\n')
return (evttypes, evttype_indices)
def Run(self):
"""Iterates through a tagged Plaso Store file, attempting to cluster events
into groups that tend to happen together, to help creating Tag Input files.
Future work includes the ability to parse multiple Plaso Store files at
once. By default this will write incremental progress to dotfiles in the
current directory."""
self.plaso_hash = ClusteringEngine.HashFile(self.target_filename)
self.nodup_filename = self.NoDuplicates(self.target_filename)
self.vector = self.ConstructHashVector(
self.nodup_filename, self.vector_size)
self.frequent_words = self.FindFrequentWords(
self.nodup_filename, self.threshold, self.vector)
(self.event_types, self.event_type_indices) = self.BuildEventTypes(
self.nodup_filename, self.threshold, self.frequent_words)
# Next step, clustering the event types
# TODO: implement clustering.
def Main():
"""The main application function."""
front_end = PlasmFrontend()
epilog_tag = ("""
Notes:
When applying tags, a tag input file must be given. Currently,
the format of this file is simply the tag name, followed by
indented lines indicating conditions for the tag, treating any
lines beginning with # as comments. For example, a valid tagging
input file might look like this:'
------------------------------
Obvious Malware
# anything with 'malware' in the name or path
filename contains 'malware'
# anything with the malware datatype
datatype is 'windows:malware:this_is_not_a_real_datatype'
File Download
timestamp_desc is 'File Downloaded'
------------------------------
Tag files can be found in the "extra" directory of plaso.
""")
epilog_group = ("""
When applying groups, the Plaso storage file *must* contain tags,
as only tagged events are grouped. Plasm can be run such that it
both applies tags and applies groups, in which case an untagged
Plaso storage file may be used, since tags will be applied before
the grouping is calculated.
""")
epilog_main = ("""
For help with a specific action, use "plasm.py {cluster,group,tag} -h".
""")
description = (
u'PLASM (Plaso Langar Ad Safna Minna)- Application to tag and group '
u'Plaso storage files.')
arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description),
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(epilog_main))
arg_parser.add_argument(
'-q', '--quiet', action='store_true', dest='quiet', default=False,
help='Suppress nonessential output.')
subparsers = arg_parser.add_subparsers(dest='subcommand')
cluster_subparser = subparsers.add_parser(
'cluster', formatter_class=argparse.RawDescriptionHelpFormatter)
cluster_subparser.add_argument(
'--closeness', action='store', type=int, metavar='MSEC',
dest='cluster_closeness', default=5000, help=(
'Number of miliseconds before we stop considering two '
'events to be at all "close" to each other'))
cluster_subparser.add_argument(
'--threshold', action='store', type=int, metavar='NUMBER',
dest='cluster_threshold', default=5,
help='Support threshold for pruning attributes.')
front_end.AddStorageFileOptions(cluster_subparser)
group_subparser = subparsers.add_parser(
'group', formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(epilog_group))
front_end.AddStorageFileOptions(group_subparser)
tag_subparser = subparsers.add_parser(
'tag', formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(epilog_tag))
tag_subparser.add_argument(
'--tagfile', '--tag_file', '--tag-file', action='store', type=unicode,
metavar='FILE', dest='tag_filename', help=(
'Name of the file containing a description of tags and rules '
'for tagging events.'))
front_end.AddStorageFileOptions(tag_subparser)
options = arg_parser.parse_args()
try:
front_end.ParseOptions(options)
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error(u'{0:s}'.format(exception))
return False
if front_end.mode == 'cluster':
front_end.ClusterEvents()
elif front_end.mode == 'group':
front_end.GroupEvents()
elif front_end.mode == 'tag':
front_end.TagEvents()
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+195
View File
@@ -0,0 +1,195 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the plasm front-end."""
import os
import shutil
import tempfile
import unittest
from plaso.engine import queue
from plaso.frontend import plasm
from plaso.frontend import test_lib
from plaso.lib import event
from plaso.lib import pfilter
from plaso.lib import storage
from plaso.multi_processing import multi_process
class TestEvent(event.EventObject):
DATA_TYPE = 'test:plasm:1'
def __init__(self, timestamp, filename='/dev/null', stuff='bar'):
super(TestEvent, self).__init__()
self.timestamp = timestamp
self.filename = filename
self.timestamp_desc = 'Last Written'
self.parser = 'TestEvent'
self.display_name = 'fake:{}'.format(filename)
self.stuff = stuff
class PlasmTest(test_lib.FrontendTestCase):
"""Tests for the plasm front-end."""
def setUp(self):
"""Sets up the objects used throughout the test."""
self._temp_directory = tempfile.mkdtemp()
self._storage_filename = os.path.join(self._temp_directory, 'plaso.db')
self._tag_input_filename = os.path.join(self._temp_directory, 'input1.tag')
tag_input_file = open(self._tag_input_filename, 'wb')
tag_input_file.write('\n'.join([
'Test Tag',
' filename contains \'/tmp/whoaaaa\'',
' parser is \'TestEvent\' and stuff is \'dude\'']))
tag_input_file.close()
pfilter.TimeRangeCache.ResetTimeConstraints()
# TODO: add upper queue limit.
test_queue = multi_process.MultiProcessingQueue()
test_queue_producer = queue.ItemQueueProducer(test_queue)
test_queue_producer.ProduceItems([
TestEvent(0),
TestEvent(1000),
TestEvent(2000000, '/tmp/whoaaaaa'),
TestEvent(2500000, '/tmp/whoaaaaa'),
TestEvent(5000000, '/tmp/whoaaaaa', 'dude')])
test_queue_producer.SignalEndOfInput()
storage_writer = storage.StorageFileWriter(
test_queue, self._storage_filename)
storage_writer.WriteEventObjects()
self._storage_file = storage.StorageFile(self._storage_filename)
self._storage_file.SetStoreLimit()
def tearDown(self):
"""Cleans up the objects used throughout the test."""
shutil.rmtree(self._temp_directory, True)
def testTagParsing(self):
"""Test if plasm can parse Tagging Input files."""
tags = plasm.ParseTaggingFile(self._tag_input_filename)
self.assertEquals(len(tags), 1)
self.assertTrue('Test Tag' in tags)
self.assertEquals(len(tags['Test Tag']), 2)
def testInvalidTagParsing(self):
"""Test what happens when Tagging Input files contain invalid conditions."""
tag_input_filename = os.path.join(self._temp_directory, 'input2.tag')
tag_input_file = open(tag_input_filename, 'wb')
tag_input_file.write('\n'.join([
'Invalid Tag', ' my hovercraft is full of eels']))
tag_input_file.close()
tags = plasm.ParseTaggingFile(tag_input_filename)
self.assertEquals(len(tags), 1)
self.assertTrue('Invalid Tag' in tags)
self.assertEquals(len(tags['Invalid Tag']), 0)
def testMixedValidityTagParsing(self):
"""Tagging Input file contains a mix of valid and invalid conditions."""
tag_input_filename = os.path.join(self._temp_directory, 'input3.tag')
tag_input_file = open(tag_input_filename, 'wb')
tag_input_file.write('\n'.join([
'Semivalid Tag', ' filename contains \'/tmp/whoaaaa\'',
' Yandelavasa grldenwi stravenka']))
tag_input_file.close()
tags = plasm.ParseTaggingFile(tag_input_filename)
self.assertEquals(len(tags), 1)
self.assertTrue('Semivalid Tag' in tags)
self.assertEquals(len(tags['Semivalid Tag']), 1)
def testIteratingOverPlasoStore(self):
"""Tests the plaso storage iterator"""
counter = 0
for _ in plasm.EventObjectGenerator(self._storage_file, quiet=True):
counter += 1
self.assertEquals(counter, 5)
self._storage_file.Close()
pfilter.TimeRangeCache.ResetTimeConstraints()
self._storage_file = storage.StorageFile(self._storage_filename)
self._storage_file.SetStoreLimit()
counter = 0
for _ in plasm.EventObjectGenerator(self._storage_file, quiet=False):
counter += 1
self.assertEquals(counter, 5)
def testTaggingEngine(self):
"""Tests the Tagging engine's functionality."""
self.assertFalse(self._storage_file.HasTagging())
tagging_engine = plasm.TaggingEngine(
self._storage_filename, self._tag_input_filename, quiet=True)
tagging_engine.Run()
test = storage.StorageFile(self._storage_filename)
self.assertTrue(test.HasTagging())
tagging = test.GetTagging()
count = 0
for tag_event in tagging:
count += 1
self.assertEquals(tag_event.tags, ['Test Tag'])
self.assertEquals(count, 3)
def testGroupingEngineUntagged(self):
"""Grouping engine should do nothing if dealing with untagged storage."""
storage_file = storage.StorageFile(self._storage_filename, read_only=False)
grouping_engine = plasm.GroupingEngine()
grouping_engine.Run(storage_file, quiet=True)
storage_file.Close()
storage_file = storage.StorageFile(self._storage_filename, read_only=True)
self.assertFalse(storage_file.HasGrouping())
storage_file.Close()
def testGroupingEngine(self):
"""Tests the Grouping engine's functionality."""
pfilter.TimeRangeCache.ResetTimeConstraints()
tagging_engine = plasm.TaggingEngine(
self._storage_filename, self._tag_input_filename, quiet=True)
tagging_engine.Run()
storage_file = storage.StorageFile(self._storage_filename, read_only=False)
grouping_engine = plasm.GroupingEngine()
grouping_engine.Run(storage_file, quiet=True)
storage_file.Close()
storage_file = storage.StorageFile(self._storage_filename, read_only=True)
storage_file.SetStoreLimit()
self.assertTrue(storage_file.HasGrouping())
groups = storage_file.GetGrouping()
count = 0
for group_event in groups:
count += 1
self.assertEquals(group_event.category, 'Test Tag')
self.assertEquals(count, 2)
storage_file.Close()
if __name__ == '__main__':
unittest.main()
+364
View File
@@ -0,0 +1,364 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test run for a single file and a display of how many events are collected."""
import argparse
import collections
import cProfile
import logging
import os
import pstats
import sys
import time
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.proto import transmission_pb2
from dfvfs.resolver import resolver as path_spec_resolver
from dfvfs.serializer import protobuf_serializer
from google.protobuf import text_format
try:
# Support version 1.X of IPython.
# pylint: disable=no-name-in-module
from IPython.terminal.embed import InteractiveShellEmbed
except ImportError:
# Support version older than 1.X of IPython.
# pylint: disable=no-name-in-module
from IPython.frontend.terminal.embed import InteractiveShellEmbed
import pyevt
import pyevtx
import pylnk
import pymsiecf
import pyregf
import plaso
from plaso.engine import engine
from plaso.engine import queue
from plaso.engine import single_process
from plaso.frontend import psort
from plaso.frontend import utils as frontend_utils
# TODO: Remove this after the dfVFS integration.
# TODO: Make sure we don't need to implement the method _ConsumeItem, or
# to have that not as an abstract method.
# pylint: disable=abstract-method
class PprofEventObjectQueueConsumer(queue.EventObjectQueueConsumer):
"""Class that implements an event object queue consumer for pprof."""
def __init__(self, queue_object):
"""Initializes the queue consumer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(PprofEventObjectQueueConsumer, self).__init__(queue_object)
self.counter = collections.Counter()
self.parsers = []
self.plugins = []
def _ConsumeEventObject(self, event_object, **unused_kwargs):
"""Consumes an event object callback for ConsumeEventObject."""
parser = getattr(event_object, 'parser', u'N/A')
if parser not in self.parsers:
self.parsers.append(parser)
plugin = getattr(event_object, 'plugin', u'N/A')
if plugin not in self.plugins:
self.plugins.append(plugin)
self.counter[parser] += 1
if plugin != u'N/A':
self.counter[u'[Plugin] {}'.format(plugin)] += 1
self.counter['Total'] += 1
def PrintHeader(options):
"""Print header information, including library versions."""
print frontend_utils.FormatHeader('File Parsed')
print u'{:>20s}'.format(options.file_to_parse)
print frontend_utils.FormatHeader('Versions')
print frontend_utils.FormatOutputString('plaso engine', plaso.GetVersion())
print frontend_utils.FormatOutputString('pyevt', pyevt.get_version())
print frontend_utils.FormatOutputString('pyevtx', pyevtx.get_version())
print frontend_utils.FormatOutputString('pylnk', pylnk.get_version())
print frontend_utils.FormatOutputString('pymsiecf', pymsiecf.get_version())
print frontend_utils.FormatOutputString('pyregf', pyregf.get_version())
if options.filter:
print frontend_utils.FormatHeader('Filter Used')
print frontend_utils.FormatOutputString('Filter String', options.filter)
if options.parsers:
print frontend_utils.FormatHeader('Parser Filter Used')
print frontend_utils.FormatOutputString('Parser String', options.parsers)
def ProcessStorage(options):
"""Process a storage file and produce profile results.
Args:
options: the command line arguments (instance of argparse.Namespace).
Returns:
The profiling statistics or None on error.
"""
storage_parameters = options.storage.split()
storage_parameters.append(options.file_to_parse)
if options.filter:
storage_parameters.append(options.filter)
if options.verbose:
# TODO: why not move this functionality into psort?
profiler = cProfile.Profile()
profiler.enable()
else:
time_start = time.time()
# Call psort and process output.
return_value = psort.Main(storage_parameters)
if options.verbose:
profiler.disable()
else:
time_end = time.time()
if return_value:
print u'Parsed storage file.'
else:
print u'It appears the storage file may not have processed correctly.'
if options.verbose:
return GetStats(profiler)
else:
print frontend_utils.FormatHeader('Time Used')
print u'{:>20f}s'.format(time_end - time_start)
def ProcessFile(options):
"""Process a file and produce profile results."""
if options.proto_file and os.path.isfile(options.proto_file):
with open(options.proto_file) as fh:
proto_string = fh.read()
proto = transmission_pb2.PathSpec()
try:
text_format.Merge(proto_string, proto)
except text_format.ParseError as exception:
logging.error(u'Unable to parse file, error: {}'.format(
exception))
sys.exit(1)
serializer = protobuf_serializer.ProtobufPathSpecSerializer
path_spec = serializer.ReadSerializedObject(proto)
else:
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=options.file_to_parse)
file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
if file_entry is None:
logging.error(u'Unable to open file: {0:s}'.format(options.file_to_parse))
sys.exit(1)
# Set few options the engine expects to be there.
# TODO: Can we rather set this directly in argparse?
options.single_process = True
options.debug = False
options.text_prepend = u''
# Set up the engine.
# TODO: refactor and add queue limit.
collection_queue = single_process.SingleProcessQueue()
storage_queue = single_process.SingleProcessQueue()
parse_error_queue = single_process.SingleProcessQueue()
engine_object = engine.BaseEngine(
collection_queue, storage_queue, parse_error_queue)
# Create a worker.
worker_object = engine_object.CreateExtractionWorker('0')
# TODO: add support for parser_filter_string.
worker_object.InitalizeParserObjects()
if options.verbose:
profiler = cProfile.Profile()
profiler.enable()
else:
time_start = time.time()
worker_object.ParseFileEntry(file_entry)
if options.verbose:
profiler.disable()
else:
time_end = time.time()
engine_object.SignalEndOfInputStorageQueue()
event_object_consumer = PprofEventObjectQueueConsumer(storage_queue)
event_object_consumer.ConsumeEventObjects()
if not options.verbose:
print frontend_utils.FormatHeader('Time Used')
print u'{:>20f}s'.format(time_end - time_start)
print frontend_utils.FormatHeader('Parsers Loaded')
# Accessing protected member.
# pylint: disable=protected-access
plugins = []
for parser_object in sorted(worker_object._parser_objects):
print frontend_utils.FormatOutputString('', parser_object.NAME)
parser_plugins = getattr(parser_object, '_plugins', [])
plugins.extend(parser_plugins)
print frontend_utils.FormatHeader('Plugins Loaded')
for plugin in sorted(plugins):
if isinstance(plugin, basestring):
print frontend_utils.FormatOutputString('', plugin)
else:
plugin_string = getattr(plugin, 'NAME', u'N/A')
print frontend_utils.FormatOutputString('', plugin_string)
print frontend_utils.FormatHeader('Parsers Used')
for parser in sorted(event_object_consumer.parsers):
print frontend_utils.FormatOutputString('', parser)
print frontend_utils.FormatHeader('Plugins Used')
for plugin in sorted(event_object_consumer.plugins):
print frontend_utils.FormatOutputString('', plugin)
print frontend_utils.FormatHeader('Counter')
for key, value in event_object_consumer.counter.most_common():
print frontend_utils.FormatOutputString(key, value)
if options.verbose:
return GetStats(profiler)
def GetStats(profiler):
"""Print verbose information from profiler and return a stats object."""
stats = pstats.Stats(profiler, stream=sys.stdout)
print frontend_utils.FormatHeader('Profiler')
print '\n{:-^20}'.format(' Top 10 Time Spent ')
stats.sort_stats('cumulative')
stats.print_stats(10)
print '\n{:-^20}'.format(' Sorted By Function Calls ')
stats.sort_stats('calls')
stats.print_stats()
return stats
def Main():
"""Start the tool."""
usage = (
u'Run this tool against a single file to see how many events are '
u'extracted from it and which parsers recognize it.')
arg_parser = argparse.ArgumentParser(description=usage)
format_str = '[%(levelname)s] %(message)s'
logging.basicConfig(level=logging.INFO, format=format_str)
arg_parser.add_argument(
'-v', '--verbose', dest='verbose', action='store_true', default=False,
help=(
'Be extra verbose in the information printed out (include full '
'stats).'))
arg_parser.add_argument(
'-c', '--console', dest='console', action='store_true',
default=False, help='After processing drop to an interactive shell.')
arg_parser.add_argument(
'-p', '--parsers', dest='parsers', action='store', default='', type=str,
help='A list of parsers to include (see log2timeline documentation).')
arg_parser.add_argument(
'--proto', dest='proto_file', action='store', default='', type=unicode,
metavar='PROTO_FILE', help=(
'A file containing an ASCII PathSpec protobuf describing how to '
'open up the file for parsing.'))
arg_parser.add_argument(
'-s', '--storage', dest='storage', action='store', type=unicode,
metavar='PSORT_PARAMETER', default='', help=(
'Run the profiler against a storage file, with the parameters '
'provided with this option, eg: "-q -w /dev/null". The storage '
'file has to be passed in as the FILE_TO_PARSE argument to the '
'tool and filters are also optional. This is equivilant to calling '
'psort.py STORAGE_PARAMETER FILE_TO_PARSE [FILTER]. Where the '
'storage parameters are the ones defined with this parameter.'))
# TODO: Add the option of dropping into a python shell that contains the
# stats attribute and others, just print out basic information and do the
# profiling, then drop into a ipython shell that allows you to work with
# the stats object.
arg_parser.add_argument(
'file_to_parse', nargs='?', action='store', metavar='FILE_TO_PARSE',
default=None, help='A path to the file that is to be parsed.')
arg_parser.add_argument(
'filter', action='store', metavar='FILTER', nargs='?', default=None,
help=('A filter that can be used to filter the dataset before it '
'is written into storage. More information about the filters'
' and it\'s usage can be found here: http://plaso.kiddaland.'
'net/usage/filters'))
options = arg_parser.parse_args()
if not (options.file_to_parse or options.proto_file):
arg_parser.print_help()
print ''
arg_parser.print_usage()
print ''
logging.error('Not able to run without a file to process.')
return False
if options.file_to_parse and not os.path.isfile(options.file_to_parse):
logging.error(u'File [{0:s}] needs to exist.'.format(options.file_to_parse))
return False
PrintHeader(options)
# Stats attribute used for console sessions.
# pylint: disable=unused-variable
if options.storage:
stats = ProcessStorage(options)
else:
stats = ProcessFile(options)
if options.console:
ipshell = InteractiveShellEmbed()
ipshell.confirm_exit = False
ipshell()
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+2161
View File
File diff suppressed because it is too large Load Diff
+353
View File
@@ -0,0 +1,353 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the preg front-end."""
import StringIO
import unittest
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from plaso.frontend import preg
from plaso.frontend import test_lib
from plaso.lib import errors
class StringIOOutputWriter(object):
"""Class that implements a StringIO output writer."""
def __init__(self):
"""Initialize the string output writer."""
super(StringIOOutputWriter, self).__init__()
self._string_obj = StringIO.StringIO()
# Make the output writer compatible with a filehandle interface.
self.write = self.Write
def flush(self):
"""Flush the internal buffer."""
self._string_obj.flush()
def GetValue(self):
"""Returns the write buffer from the output writer."""
return self._string_obj.getvalue()
def GetLine(self):
"""Returns a single line read from the output buffer."""
return self._string_obj.readline()
def SeekToBeginning(self):
"""Seeks the output buffer to the beginning of the buffer."""
self._string_obj.seek(0)
def Write(self, string):
"""Writes a string to the StringIO object."""
self._string_obj.write(string)
class PregFrontendTest(test_lib.FrontendTestCase):
"""Tests for the preg front-end."""
def _GetHelperAndOutputWriter(self):
"""Return a helper object (instance of PregHelper) and an output writer."""
hive_storage = preg.PregStorage()
options = test_lib.Options()
output_writer = StringIOOutputWriter()
test_front_end = preg.PregFrontend(output_writer)
shell_helper = preg.PregHelper(options, test_front_end, hive_storage)
return shell_helper, output_writer
def testBadRun(self):
"""Test few functions that should raise exceptions."""
shell_helper, _ = self._GetHelperAndOutputWriter()
options = test_lib.Options()
options.foo = u'bar'
with self.assertRaises(errors.BadConfigOption):
shell_helper.tool_front_end.ParseOptions(options)
options.regfile = 'this_path_does_not_exist'
with self.assertRaises(errors.BadConfigOption):
shell_helper.tool_front_end.ParseOptions(options)
def testFrontEnd(self):
"""Test various functions inside the front end object."""
shell_helper, _ = self._GetHelperAndOutputWriter()
front_end = shell_helper.tool_front_end
options = test_lib.Options()
hive_path = self._GetTestFilePath([u'NTUSER.DAT'])
options.regfile = hive_path
front_end.ParseOptions(options, source_option='image')
# Test the --info parameter to the tool.
info_string = front_end.GetListOfAllPlugins()
self.assertTrue(u'* Supported Plugins *' in info_string)
self.assertTrue(
u'userassist : Parser for User Assist Registry data' in info_string)
self.assertTrue(
u'services : Parser for services and drivers Registry ' in info_string)
# Get paths to various registry files.
hive_paths_for_usersassist = set([
u'/Documents And Settings/.+/NTUSER.DAT', '/Users/.+/NTUSER.DAT'])
# Testing functions within the front end, thus need to access protected
# members.
# pylint: disable=protected-access
test_paths_for_userassist = set(
front_end._GetRegistryFilePaths(u'userassist'))
self.assertEquals(hive_paths_for_usersassist, test_paths_for_userassist)
# Set the path to the system registry.
preg.PregCache.knowledge_base_object.pre_obj.sysregistry = u'C:/Windows/Foo'
# Test the SOFTWARE hive.
test_paths = front_end._GetRegistryFilePaths(u'', u'SOFTWARE')
self.assertEqual(test_paths, [u'C:/Windows/Foo/SOFTWARE'])
def testMagicClass(self):
"""Test the magic class functions."""
# Open up a hive.
hive_path = self._GetTestFilePath([u'NTUSER.DAT'])
shell_helper, _ = self._GetHelperAndOutputWriter()
hive_helper = shell_helper.OpenHive(hive_path, None)
self.assertEqual(hive_helper.name, u'NTUSER.DAT')
preg.PregCache.shell_helper = shell_helper
preg.PregCache.hive_storage = shell_helper.hive_storage
preg.PregCache.parser_context = shell_helper.BuildParserContext()
# Mark this hive as the currently opened one.
preg.PregCache.hive_storage.AppendHive(hive_helper)
storage_length = len(preg.PregCache.hive_storage)
preg.PregCache.hive_storage.SetOpenHive(storage_length - 1)
magic_obj = preg.MyMagics(None)
# Change directory and verify it worked.
registry_key_path = u'\\Software\\JavaSoft\\Java Update\\Policy'
magic_obj.ChangeDirectory(registry_key_path)
registry_key = preg.GetCurrentKey()
self.assertEquals(registry_key.path, registry_key_path)
self.assertEquals(
hive_helper.GetCurrentRegistryKey().path, registry_key_path)
# List the directory content.
output_string = StringIOOutputWriter()
magic_obj.RedirectOutput(output_string)
magic_obj.ListDirectoryContent(u'')
expected_strings = [
u'-r-xr-xr-x [REG_SZ] LastUpdateBeginTime',
u'-r-xr-xr-x [REG_SZ] LastUpdateFinishTime',
u'-r-xr-xr-x [REG_SZ] VersionXmlURL\n']
self.assertEquals(output_string.GetValue(), u'\n'.join(expected_strings))
# Parse the current key.
output_string = StringIOOutputWriter()
magic_obj.RedirectOutput(output_string)
magic_obj.ParseCurrentKey(u'')
partial_string = (
u'LastUpdateFinishTime : [REG_SZ] Tue, 04 Aug 2009 15:18:35 GMT')
self.assertTrue(partial_string in output_string.GetValue())
# Parse using a plugin.
output_string = StringIOOutputWriter()
magic_obj.RedirectOutput(output_string)
magic_obj.ParseWithPlugin(u'userassist')
partial_string = (
u'UEME_RUNPIDL:%csidl2%\\BCWipe 3.0\\BCWipe Task Manager.lnk '
u': [Count: 1]')
self.assertTrue(partial_string in output_string.GetValue())
# Let's see where we are at the moment.
output_string = StringIOOutputWriter()
magic_obj.RedirectOutput(output_string)
magic_obj.PrintCurrentWorkingDirectory(u'')
current_directory = (
u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer'
u'\\UserAssist\\{5E6AB780-7743-11CF-A12B-00AA004AE837}\n')
self.assertEquals(current_directory, output_string.GetValue())
def testParseHive(self):
"""Test the ParseHive function."""
shell_helper, _ = self._GetHelperAndOutputWriter()
# TODO: Replace this once _GetTestFileEntry is pushed in.
system_hive_path = self._GetTestFilePath(['SYSTEM'])
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=system_hive_path)
collectors = [('current', None)]
key_paths = [
u'\\ControlSet001\\Enum\\USBSTOR',
u'\\ControlSet001\\Enum\\USB',
u'\\ControlSet001\\Control\\Windows']
output = shell_helper.tool_front_end.ParseHive(
path_spec, collectors, shell_helper, key_paths=key_paths,
use_plugins=None, verbose=False)
self.assertTrue(u'ComponentizedBuild : [REG_DWORD_LE] 1' in output)
self.assertTrue(u'subkey_name : Disk&Ven_HP&Prod_v100w&Rev_1024' in output)
def testRunPlugin(self):
"""Tests running the preg frontend against a plugin."""
shell_helper, output_writer = self._GetHelperAndOutputWriter()
options = shell_helper.tool_options
options.regfile = self._GetTestFilePath(['NTUSER.DAT'])
options.verbose = False
shell_helper.tool_front_end.ParseOptions(options, source_option='image')
shell_helper.tool_front_end.RunModeRegistryPlugin(options, u'userassist')
self.assertTrue((
u'UEME_RUNPATH:C:\\Program Files\\Internet Explorer\\iexplore.exe : '
u'[Count: 1]') in output_writer.GetValue())
# TODO: Add tests that parse a disk image. Test both Registry key parsing
# and plugin parsing.
def testRunAgainstKey(self):
"""Tests running the preg frontend against a Registry key."""
shell_helper, output_writer = self._GetHelperAndOutputWriter()
options = shell_helper.tool_options
options.key = u'\\Microsoft\\Windows NT\\CurrentVersion'
options.regfile = self._GetTestFilePath(['SOFTWARE'])
options.verbose = False
shell_helper.tool_front_end.ParseOptions(options, source_option='image')
shell_helper.tool_front_end.RunModeRegistryKey(options, u'')
self.assertTrue(
u'Product name : Windows 7 Ultimate' in output_writer.GetValue())
def testRunAgainstFile(self):
"""Tests running the preg frontend against a whole Registry file."""
shell_helper, output_writer = self._GetHelperAndOutputWriter()
options = shell_helper.tool_options
options.regfile = self._GetTestFilePath(['SOFTWARE'])
shell_helper.tool_front_end.ParseOptions(options, source_option='image')
shell_helper.tool_front_end.RunModeRegistryFile(options, options.regfile)
plugins = set()
registry_keys = set()
line_count = 0
output_writer.SeekToBeginning()
line = output_writer.GetLine()
while line:
line_count += 1
line = line.lstrip()
if line.startswith('** Plugin'):
_, _, plugin_name = line.rpartition(':')
plugins.add(plugin_name.strip())
if line.startswith('Key Path :'):
_, _, key_name = line.rpartition(':')
registry_keys.add(key_name.strip())
line = output_writer.GetLine()
# Define the minimum set of plugins that need to be in the output.
expected_plugins = set([
u'winreg_run_software **', u'winreg_task_cache **', u'winreg_winver **',
u'winreg_msie_zone_software **', u'winreg_default **'])
self.assertTrue(expected_plugins.issubset(plugins))
self.assertTrue((
u'\\Microsoft\\Windows NT\\CurrentVersion\\Schedule\\'
u'TaskCache') in registry_keys)
self.assertTrue(
u'\\Microsoft\\Windows\\CurrentVersion\\RunOnce' in registry_keys)
# The output should grow with each newly added plugin, and it might be
# reduced with changes to the codebase, yet there should be at least 1.500
# lines in the output.
self.assertGreater(line_count, 1500)
def testTopLevelMethods(self):
"""Test few of the top level methods in the preg module."""
shell_helper, _ = self._GetHelperAndOutputWriter()
# Set the cache.
preg.PregCache.shell_helper = shell_helper
preg.PregCache.hive_storage = shell_helper.hive_storage
preg.PregCache.parser_context = shell_helper.BuildParserContext()
# Open up a hive.
hive_path = self._GetTestFilePath([u'NTUSER.DAT'])
hive_helper = shell_helper.OpenHive(hive_path, None)
preg.PregCache.hive_storage.AppendHive(hive_helper)
preg.PregCache.hive_storage.SetOpenHive(
len(preg.PregCache.hive_storage) - 1)
self.assertTrue(preg.IsLoaded())
self.assertEqual(
preg.PregCache.hive_storage.loaded_hive.name, u'NTUSER.DAT')
# Open a Registry key using the magic class.
registry_key_path = u'\\Software\\JavaSoft\\Java Update\\Policy'
magic_obj = preg.MyMagics(None)
magic_obj.ChangeDirectory(registry_key_path)
registry_key = preg.GetCurrentKey()
hive_helper = preg.PregCache.hive_storage.loaded_hive
self.assertEquals(registry_key.path, registry_key_path)
self.assertEquals(
hive_helper.GetCurrentRegistryKey().path, registry_key_path)
# Get a value out of the currently loaded Registry key.
value = preg.GetValue(u'VersionXmlURL')
self.assertEquals(value.name, u'VersionXmlURL')
value_data = preg.GetValueData(u'VersionXmlURL')
self.assertEquals(
value_data,
u'http://javadl.sun.com/webapps/download/AutoDL?BundleId=33742')
# Parse a Registry key.
parsed_strings = preg.ParseKey(
registry_key, shell_helper=shell_helper, hive_helper=hive_helper)
self.assertTrue(parsed_strings[1].lstrip().startswith(u'** Plugin : '))
# Change back to the root key.
magic_obj.ChangeDirectory(u'')
registry_key = preg.GetCurrentKey()
self.assertEquals(registry_key.path, u'\\')
# TODO: Add tests for formatting of events, eg: parse a key, get the event
# objects and test the formatting of said event object.
# TODO: Add tests for running in console mode.
if __name__ == '__main__':
unittest.main()
+72
View File
@@ -0,0 +1,72 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper file for filtering out parsers."""
categories = {
'win_gen': [
'bencode', 'esedb', 'filestat', 'google_drive', 'java_idx', 'lnk',
'mcafee_protection', 'olecf', 'openxml', 'prefetch',
'skydrive_log_error', 'skydrive_log', 'skype',
'symantec_scanlog', 'webhist', 'winfirewall', 'winjob',
'winreg'],
'winxp': [
'recycle_bin_info2', 'win_gen', 'winevt'],
'winxp_slow': [
'hachoir', 'winxp'],
'win7': [
'recycle_bin', 'custom_destinations', 'olecf_automatic_destinations',
'win_gen', 'winevtx'],
'win7_slow': [
'hachoir', 'win7'],
'webhist': [
'chrome_cache', 'chrome_cookies', 'chrome_extension_activity',
'chrome_history', 'firefox_cache', 'firefox_cookies',
'firefox_downloads', 'firefox_history', 'java_idx', 'msie_webcache',
'msiecf', 'opera_global', 'opera_typed_history', 'safari_history'],
'linux': [
'bencode', 'filestat', 'google_drive', 'java_idx', 'olecf', 'openxml',
'pls_recall', 'popularity_contest', 'selinux', 'skype', 'syslog',
'utmp', 'webhist', 'xchatlog', 'xchatscrollback', 'zeitgeist'],
'macosx': [
'appusage', 'asl_log', 'bencode', 'bsm_log', 'cups_ipp', 'filestat',
'google_drive', 'java_idx', 'ls_quarantine', 'mac_appfirewall_log',
'mac_document_versions', 'mac_keychain', 'mac_securityd',
'mackeeper_cache', 'macwifi', 'olecf', 'openxml', 'plist', 'skype',
'utmpx', 'webhist'],
# TODO: Once syslog parser has been rewritten to be faster than the current
# one it's moved out of the default parsers for Mac OS X and into the "slow"
# mode.
'macosx_slow': ['macosx', 'syslog'],
'android': [
'android_app_usage', 'android_calls', 'android_sms'],
}
def GetParsersFromCategory(category):
"""Return a list of parsers from a parser category."""
return_list = []
if category not in categories:
return return_list
for item in categories.get(category):
if item in categories:
return_list.extend(GetParsersFromCategory(item))
else:
return_list.append(item)
return return_list
+498
View File
@@ -0,0 +1,498 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a console, the CLI friendly front-end to plaso."""
import argparse
import logging
import os
import random
import sys
import tempfile
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
try:
# Support version 1.X of IPython.
# pylint: disable=no-name-in-module
from IPython.terminal.embed import InteractiveShellEmbed
except ImportError:
# Support version older than 1.X of IPython.
# pylint: disable=no-name-in-module
from IPython.frontend.terminal.embed import InteractiveShellEmbed
from IPython.config.loader import Config
# pylint: disable=unused-import
from plaso import analysis
from plaso import filters
from plaso import formatters
from plaso import output
from plaso import parsers
from plaso import preprocessors
from plaso.classifier import scanner
from plaso.engine import collector
from plaso.engine import engine
from plaso.engine import queue
from plaso.engine import single_process
from plaso.engine import utils as engine_utils
from plaso.frontend import frontend
from plaso.frontend import utils as frontend_utils
from plaso.lib import binary
from plaso.lib import bufferlib
from plaso.lib import errors
from plaso.lib import event
from plaso.lib import eventdata
from plaso.lib import filter_interface
from plaso.lib import lexer
from plaso.lib import objectfilter
from plaso.lib import output as output_lib
from plaso.lib import pfilter
from plaso.lib import proxy
from plaso.lib import putils
from plaso.lib import registry as class_registry
from plaso.lib import storage
from plaso.lib import timelib
from plaso.lib import utils
from plaso.multi_processing import foreman
from plaso.multi_processing import rpc_proxy
from plaso.multi_processing import process_info
from plaso.output import helper as output_helper
from plaso.parsers import manager as parsers_manager
from plaso.parsers import plugins
from plaso.parsers import text_parser
from plaso.proto import plaso_storage_pb2
from plaso.serializer import interface as serializer_interface
from plaso.serializer import json_serializer
from plaso.serializer import protobuf_serializer
from plaso.unix import bsmtoken
from plaso.winnt import environ_expand
from plaso.winnt import known_folder_ids
from plaso.winreg import cache as win_registry_cache
from plaso.winreg import interface as win_registry_interface
from plaso.winreg import path_expander
from plaso.winreg import utils as win_registry_utils
from plaso.winreg import winpyregf
from plaso.winreg import winregistry
class PshellFrontend(frontend.ExtractionFrontend):
"""Class that implements the pshell front-end."""
_BYTES_IN_A_MIB = 1024 * 1024
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(PshellFrontend, self).__init__(input_reader, output_writer)
def FindAllOutputs():
"""FindAllOutputs() - All available outputs."""
return putils.FindAllOutputs()
def GetEventData(event_proto, before=0):
"""Prints a hexdump of the event data."""
return frontend_utils.OutputWriter.GetEventDataHexDump(event_proto, before)
def GetFileEntryFromEventObject(event_object):
"""Return a file entry object from a pathspec object.
Args:
event_object: An event object (an instance of EventObject).
Returns:
A file entry object (instance of vfs.file_entry.FileEntry) or
None if the event object doesn't have a defined path spec.
"""
path_spec = getattr(event_object, 'pathspec', None)
if not path_spec:
return
return path_spec_resolver.Resolver.OpenFileEntry(path_spec)
def GetParserNames(parser_filter_string=None):
"""Retrieves the parser names.
Args:
parser_filter_string: Optional parser filter string. The default is None.
Returns:
A list of parser names.
"""
return parsers_manager.ParsersManager.GetParserNames(
parser_filter_string=parser_filter_string)
def GetParserObjects(parser_filter_string=None):
"""Retrieves the parser objects.
Args:
parser_filter_string: Optional parser filter string. The default is None.
Returns:
A list of parser objects (instances of BaseParser).
"""
return parsers_manager.ParsersManager.GetParserObjects(
parser_filter_string=parser_filter_string)
def OpenOSFile(path):
"""Opens a file entry from the OS."""
if not os.path.isfile(path):
logging.error(u'File: {0:s} does not exist.'.format(path))
return
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=path)
return path_spec_resolver.Resolver.OpenFileEntry(path_spec)
def OpenStorageFile(storage_path):
"""Opens a storage file and returns the storage file object."""
if not os.path.isfile(storage_path):
return
try:
store = storage.StorageFile(storage_path, read_only=True)
except IOError:
print 'Unable to load storage file, not a storage file?'
return store
def OpenTskFile(image_path, image_offset, path=None, inode=None):
"""Opens a file entry of a file inside an image file."""
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=image_path)
if image_offset > 0:
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK_PARTITION, start_offset=image_offset,
parent=path_spec)
else:
volume_path_spec = path_spec
if inode is not None:
if path is None:
path = u''
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK, inode=inode, location=path,
parent=volume_path_spec)
else:
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK, location=path, parent=volume_path_spec)
return path_spec_resolver.Resolver.OpenFileEntry(path_spec)
def OpenVssFile(path, image_path, store_number, image_offset):
"""Opens a file entry inside a VSS inside an image file."""
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=image_path)
if image_offset > 0:
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK_PARTITION, start_offset=image_offset,
parent=path_spec)
else:
volume_path_spec = path_spec
store_number -= 1
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_VSHADOW, store_index=store_number,
parent=volume_path_spec)
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TSK, location=path, parent=path_spec)
return path_spec_resolver.Resolver.OpenFileEntry(path_spec)
def ParseFile(file_entry):
"""Parse a file given a file entry or path and return a list of results.
Args:
file_entry: Either a file entry object (instance of dfvfs.FileEntry)
or a string containing a path (absolute or relative) to a
local file.
Returns:
A list of event object (instance of EventObject) that were extracted from
the file (or an empty list if no events were extracted).
"""
if not file_entry:
return
if isinstance(file_entry, basestring):
file_entry = OpenOSFile(file_entry)
# Set up the engine.
# TODO: refactor and add queue limit.
collection_queue = single_process.SingleProcessQueue()
storage_queue = single_process.SingleProcessQueue()
parse_error_queue = single_process.SingleProcessQueue()
engine_object = engine.BaseEngine(
collection_queue, storage_queue, parse_error_queue)
# Create a worker.
worker_object = engine_object.CreateExtractionWorker(0)
# TODO: add support for parser_filter_string.
worker_object.InitalizeParserObjects()
worker_object.ParseFileEntry(file_entry)
collection_queue.SignalEndOfInput()
engine_object.SignalEndOfInputStorageQueue()
results = []
while True:
try:
item = storage_queue.PopItem()
except errors.QueueEmpty:
break
if isinstance(item, queue.QueueEndOfInput):
break
results.append(item)
return results
def Pfile2File(file_object, path):
"""Saves a file-like object to the path."""
return frontend_utils.OutputWriter.WriteFile(file_object, path)
def PrintTimestamp(timestamp):
"""Prints a human readable timestamp from a timestamp value."""
return frontend_utils.OutputWriter.GetDateTimeString(timestamp)
def PrintTimestampFromEvent(event_object):
"""Prints a human readable timestamp from values stored in an event object."""
return PrintTimestamp(getattr(event_object, 'timestamp', 0))
def Main():
"""Start the tool."""
temp_location = tempfile.gettempdir()
options = putils.Options()
# Set the default options.
options.buffer_size = 0
options.debug = False
options.filename = '.'
options.file_filter = ''
options.filter = ''
options.image = False
options.image_offset = None
options.image_offset_bytes = None
options.old_preprocess = False
options.open_files = False
options.output = os.path.join(temp_location, 'wheredidmytimelinego.dump')
options.output_module = ''
options.parsers = ''
options.parse_vss = False
options.preprocess = False
options.recursive = False
options.single_process = False
options.timezone = 'UTC'
options.workers = 5
format_str = '[%(levelname)s] (%(processName)-10s) %(message)s'
logging.basicConfig(format=format_str)
front_end = PshellFrontend()
try:
front_end.ParseOptions(options, source_option='filename')
front_end.SetStorageFile(options.output)
except errors.BadConfigOption as exception:
logging.error(u'{0:s}'.format(exception))
# TODO: move to frontend object.
if options.image and options.image_offset_bytes is None:
if options.image_offset is not None:
bytes_per_sector = getattr(options, 'bytes_per_sector', 512)
options.image_offset_bytes = options.image_offset * bytes_per_sector
else:
options.image_offset_bytes = 0
namespace = {}
pre_obj = event.PreprocessObject()
namespace.update(globals())
namespace.update({
'frontend': front_end,
'pre_obj': pre_obj,
'options': options,
'find_all_output': FindAllOutputs,
'parse_file': ParseFile,
'timestamp_from_event': PrintTimestampFromEvent,
'message': formatters.manager.EventFormatterManager.GetMessageStrings})
# Include few random phrases that get thrown in once the user exists the
# shell.
_my_random_phrases = [
u'I haven\'t seen timelines like this since yesterday.',
u'Timelining is super relaxing.',
u'Why did I not use the shell before?',
u'I like a do da cha cha',
u'I AM the Shogun of Harlem!',
(u'It doesn\'t matter if you win or lose, it\'s what you do with your '
u'dancin\' shoes'),
u'I have not had a night like that since the seventies.',
u'Baker Team. They\'re all dead, sir.',
(u'I could have killed \'em all, I could\'ve killed you. In town '
u'you\'re the law, out here it\'s me.'),
(u'Are you telling me that 200 of our men against your boy is a no-win '
u'situation for us?'),
u'Hunting? We ain\'t huntin\' him, he\'s huntin\' us!',
u'You picked the wrong man to push',
u'Live for nothing or die for something',
u'I am the Fred Astaire of karate.',
(u'God gave me a great body and it\'s my duty to take care of my '
u'physical temple.'),
u'This maniac should be wearing a number, not a badge',
u'Imagination is more important than knowledge.',
u'Do you hate being dead?',
u'You\'ve got 5 seconds... and 3 are up.',
u'He is in a gunfight right now. I\'m gonna have to take a message',
u'That would be better than losing your teeth',
u'The less you know, the more you make',
(u'A SQL query goes into a bar, walks up to two tables and asks, '
u'"Can I join you?"'),
u'This is your captor speaking.',
(u'If I find out you\'re lying, I\'ll come back and kill you in your '
u'own kitchen.'),
u'That would be better than losing your teeth',
(u'He\'s the kind of guy who would drink a gallon of gasoline so '
u'that he can p*ss into your campfire.'),
u'I\'m gonna take you to the bank, Senator Trent. To the blood bank!',
u'I missed! I never miss! They must have been smaller than I thought',
u'Nah. I\'m just a cook.',
u'Next thing I know, you\'ll be dating musicians.',
u'Another cold day in hell',
u'Yeah, but I bet you she doesn\'t see these boys in the choir.',
u'You guys think you\'re above the law... well you ain\'t above mine!',
(u'One thought he was invincible... the other thought he could fly... '
u'They were both wrong'),
u'To understand what recursion is, you must first understand recursion']
arg_description = (
u'pshell is the interactive session tool that can be used to'
u'MISSING')
arg_parser = argparse.ArgumentParser(description=arg_description)
arg_parser.add_argument(
'-s', '--storage_file', '--storage-file', dest='storage_file',
type=unicode, default=u'', help=u'Path to a plaso storage file.',
action='store', metavar='PATH')
configuration = arg_parser.parse_args()
if configuration.storage_file:
store = OpenStorageFile(configuration.storage_file)
if store:
namespace.update({'store': store})
functions = [
FindAllOutputs, GetEventData, GetParserNames, GetParserObjects,
OpenOSFile, OpenStorageFile, OpenTskFile, OpenVssFile,
ParseFile, Pfile2File,
PrintTimestamp, PrintTimestampFromEvent]
functions_strings = []
for function in functions:
docstring, _, _ = function.__doc__.partition(u'\n')
docstring = u'\t{0:s} - {1:s}'.format(function.__name__, docstring)
functions_strings.append(docstring)
functions_strings = u'\n'.join(functions_strings)
banner = (
u'--------------------------------------------------------------\n'
u' Welcome to Plaso console - home of the Plaso adventure land.\n'
u'--------------------------------------------------------------\n'
u'This is the place where everything is allowed, as long as it is '
u'written in Python.\n\n'
u'Objects available:\n\toptions - set of options to the frontend.\n'
u'\tfrontend - A copy of the pshell frontend.\n'
u'\n'
u'All libraries have been imported and can be used, see help(frontend) '
u'or help(parser).\n'
u'\n'
u'Base methods:\n'
u'{0:s}'
u'\n\tmessage - Print message strings from an event object.'
u'\n'
u'\n'
u'p.s. typing in "pdb" and pressing enter puts the shell in debug'
u'mode which causes all exceptions being sent to pdb.\n'
u'Happy command line console fu-ing.\n\n').format(functions_strings)
exit_message = u'You are now leaving the winter wonderland.\n\n{}'.format(
random.choice(_my_random_phrases))
shell_config = Config()
# Make slight adjustments to the iPython prompt.
shell_config.PromptManager.out_template = (
r'{color.Normal}[{color.Red}\#{color.Normal}]<<< ')
shell_config.PromptManager.in_template = (
r'[{color.LightBlue}\T{color.Normal}] {color.LightPurple}\Y2\n'
r'{color.Normal}[{color.Red}\#{color.Normal}] \$ ')
shell_config.PromptManager.in2_template = r'.\D.>>>'
ipshell = InteractiveShellEmbed(
user_ns=namespace, config=shell_config, banner1=banner,
exit_msg=exit_message)
ipshell.confirm_exit = False
# Set autocall to two, making parenthesis not necessary when calling
# function names (although they can be used and are necessary sometimes,
# like in variable assignments, etc).
ipshell.autocall = 2
ipshell()
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+764
View File
@@ -0,0 +1,764 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Psort (Plaso Síar Og Raðar Þessu) - Makes output from Plaso Storage files.
Sample Usage:
psort.py /tmp/mystorage.dump "date > '01-06-2012'"
See additional details here: http://plaso.kiddaland.net/usage/psort
"""
import argparse
import collections
import datetime
import time
import multiprocessing
import logging
import pdb
import sys
import plaso
from plaso import analysis
from plaso import filters
from plaso import formatters # pylint: disable=unused-import
from plaso import output # pylint: disable=unused-import
from plaso.analysis import context as analysis_context
from plaso.analysis import interface as analysis_interface
from plaso.artifacts import knowledge_base
from plaso.engine import queue
from plaso.frontend import frontend
from plaso.frontend import utils as frontend_utils
from plaso.lib import bufferlib
from plaso.lib import errors
from plaso.lib import output as output_lib
from plaso.lib import pfilter
from plaso.lib import timelib
from plaso.multi_processing import multi_process
from plaso.proto import plaso_storage_pb2
from plaso.serializer import protobuf_serializer
import pytz
class PsortFrontend(frontend.AnalysisFrontend):
"""Class that implements the psort front-end."""
def __init__(self):
"""Initializes the front-end object."""
input_reader = frontend.StdinFrontendInputReader()
output_writer = frontend.StdoutFrontendOutputWriter()
super(PsortFrontend, self).__init__(input_reader, output_writer)
self._analysis_processes = []
self._filter_buffer = None
self._filter_expression = None
self._filter_object = None
self._output_module_class = None
self._output_stream = None
self._slice_size = 5
def AddAnalysisPluginOptions(self, argument_group, plugin_names):
"""Adds the analysis plugin options to the argument group
Args:
argument_group: The argparse argument group (instance of
argparse._ArgumentGroup).
plugin_names: a string containing comma separated analysis plugin names.
Raises:
BadConfigOption: if non-existing analysis plugin names are specified.
"""
if plugin_names == 'list':
return
plugin_list = set([
name.strip().lower() for name in plugin_names.split(',')])
# Get a list of all available plugins.
analysis_plugins = set([
name.lower() for name, _, _ in analysis.ListAllPluginNames()])
# Get a list of the selected plugins (ignoring selections that did not
# have an actual plugin behind it).
plugins_to_load = analysis_plugins.intersection(plugin_list)
# Check to see if we are trying to load plugins that do not exist.
difference = plugin_list.difference(analysis_plugins)
if difference:
raise errors.BadConfigOption(
u'Non-existing analysis plugins specified: {0:s}'.format(
u' '.join(difference)))
plugins = analysis.LoadPlugins(plugins_to_load, None)
for plugin in plugins:
if plugin.ARGUMENTS:
for parameter, config in plugin.ARGUMENTS:
argument_group.add_argument(parameter, **config)
def AddOutputModuleOptions(self, argument_group, module_names):
"""Adds the output module options to the argument group
Args:
argument_group: The argparse argument group (instance of
argparse._ArgumentGroup).
module_names: a string containing comma separated output module names.
"""
if module_names == 'list':
return
modules_list = set([name.lower() for name in module_names])
for output_module_string, _ in output_lib.ListOutputFormatters():
if not output_module_string.lower() in modules_list:
continue
output_module = output_lib.GetOutputFormatter(output_module_string)
if output_module.ARGUMENTS:
for parameter, config in output_module.ARGUMENTS:
argument_group.add_argument(parameter, **config)
def ListAnalysisPlugins(self):
"""Lists the analysis modules."""
self.PrintHeader('Analysis Modules')
format_length = 10
for name, _, _ in analysis.ListAllPluginNames():
if len(name) > format_length:
format_length = len(name)
for name, description, plugin_type in analysis.ListAllPluginNames():
if plugin_type == analysis_interface.AnalysisPlugin.TYPE_ANNOTATION:
type_string = 'Annotation/tagging plugin'
elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_ANOMALY:
type_string = 'Anomaly plugin'
elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_REPORT:
type_string = 'Summary/Report plugin'
elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_STATISTICS:
type_string = 'Statistics plugin'
else:
type_string = 'Unknown type'
description = u'{0:s} [{1:s}]'.format(description, type_string)
self.PrintColumnValue(name, description, format_length)
self.PrintSeparatorLine()
def ListOutputModules(self):
"""Lists the output modules."""
self.PrintHeader('Output Modules')
for name, description in output_lib.ListOutputFormatters():
self.PrintColumnValue(name, description, 10)
self.PrintSeparatorLine()
def ListTimeZones(self):
"""Lists the timezones."""
self.PrintHeader('Zones')
max_length = 0
for zone in pytz.all_timezones:
if len(zone) > max_length:
max_length = len(zone)
self.PrintColumnValue('Timezone', 'UTC Offset', max_length)
for zone in pytz.all_timezones:
zone_obj = pytz.timezone(zone)
date_str = unicode(zone_obj.localize(datetime.datetime.utcnow()))
if '+' in date_str:
_, _, diff = date_str.rpartition('+')
diff_string = u'+{0:s}'.format(diff)
else:
_, _, diff = date_str.rpartition('-')
diff_string = u'-{0:s}'.format(diff)
self.PrintColumnValue(zone, diff_string, max_length)
self.PrintSeparatorLine()
def ParseOptions(self, options):
"""Parses the options and initializes the front-end.
Args:
options: the command line arguments (instance of argparse.Namespace).
Raises:
BadConfigOption: if the options are invalid.
"""
super(PsortFrontend, self).ParseOptions(options)
output_format = getattr(options, 'output_format', None)
if not output_format:
raise errors.BadConfigOption(u'Missing output format.')
self._output_module_class = output_lib.GetOutputFormatter(output_format)
if not self._output_module_class:
raise errors.BadConfigOption(
u'Invalid output format: {0:s}.'.format(output_format))
self._output_stream = getattr(options, 'write', None)
if not self._output_stream:
self._output_stream = sys.stdout
self._filter_expression = getattr(options, 'filter', None)
if self._filter_expression:
self._filter_object = filters.GetFilter(self._filter_expression)
if not self._filter_object:
raise errors.BadConfigOption(
u'Invalid filter expression: {0:s}'.format(self._filter_expression))
# Check to see if we need to create a circular buffer.
if getattr(options, 'slicer', None):
self._slice_size = getattr(options, 'slice_size', 5)
self._filter_buffer = bufferlib.CircularBuffer(self._slice_size)
def ParseStorage(self, options):
"""Open a storage file and parse through it.
Args:
options: the command line arguments (instance of argparse.Namespace).
Returns:
A counter.
Raises:
RuntimeError: if a non-recoverable situation is encountered.
"""
counter = None
if options.slice:
if options.timezone == 'UTC':
zone = pytz.utc
else:
zone = pytz.timezone(options.timezone)
timestamp = timelib.Timestamp.FromTimeString(options.slice, timezone=zone)
# Convert number of minutes to microseconds.
range_operator = self._slice_size * 60 * 1000000
# Set the time range.
pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator)
pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator)
if options.analysis_plugins:
read_only = False
else:
read_only = True
try:
storage_file = self.OpenStorageFile(read_only=read_only)
except IOError as exception:
raise RuntimeError(
u'Unable to open storage file: {0:s} with error: {1:s}.'.format(
self._storage_file_path, exception))
with storage_file:
storage_file.SetStoreLimit(self._filter_object)
try:
output_module = self._output_module_class(
storage_file, self._output_stream, options, self._filter_object)
except IOError as exception:
raise RuntimeError(
u'Unable to create output module with error: {0:s}'.format(
exception))
if not output_module:
raise RuntimeError(u'Missing output module.')
if options.analysis_plugins:
logging.info(u'Starting analysis plugins.')
# Within all preprocessing objects, try to get the last one that has
# time zone information stored in it, the highest chance of it
# containing the information we are seeking (defaulting to the last
# one).
pre_objs = storage_file.GetStorageInformation()
pre_obj = pre_objs[-1]
for obj in pre_objs:
if getattr(obj, 'time_zone_str', ''):
pre_obj = obj
# Fill in the collection information.
pre_obj.collection_information = {}
encoding = getattr(pre_obj, 'preferred_encoding', None)
if encoding:
cmd_line = ' '.join(sys.argv)
try:
pre_obj.collection_information['cmd_line'] = cmd_line.decode(
encoding)
except UnicodeDecodeError:
pass
pre_obj.collection_information['file_processed'] = (
self._storage_file_path)
pre_obj.collection_information['method'] = 'Running Analysis Plugins'
pre_obj.collection_information['plugins'] = options.analysis_plugins
time_of_run = timelib.Timestamp.GetNow()
pre_obj.collection_information['time_of_run'] = time_of_run
pre_obj.counter = collections.Counter()
# Assign the preprocessing object to the storage.
# This is normally done in the construction of the storage object,
# however we cannot do that here since the preprocessing object is
# stored inside the storage file, so we need to open it first to
# be able to read it in, before we make changes to it. Thus we need
# to access this protected member of the class.
# pylint: disable=protected-access
storage_file._pre_obj = pre_obj
# Start queues and load up plugins.
# TODO: add upper queue limit.
analysis_output_queue = multi_process.MultiProcessingQueue()
event_queue_producers = []
event_queues = []
analysis_plugins_list = [
x.strip() for x in options.analysis_plugins.split(',')]
for _ in xrange(0, len(analysis_plugins_list)):
# TODO: add upper queue limit.
analysis_plugin_queue = multi_process.MultiProcessingQueue()
event_queues.append(analysis_plugin_queue)
event_queue_producers.append(
queue.ItemQueueProducer(event_queues[-1]))
knowledge_base_object = knowledge_base.KnowledgeBase()
analysis_plugins = analysis.LoadPlugins(
analysis_plugins_list, event_queues, options)
# Now we need to start all the plugins.
for analysis_plugin in analysis_plugins:
analysis_report_queue_producer = queue.ItemQueueProducer(
analysis_output_queue)
analysis_context_object = analysis_context.AnalysisContext(
analysis_report_queue_producer, knowledge_base_object)
analysis_process = multiprocessing.Process(
name='Analysis {0:s}'.format(analysis_plugin.plugin_name),
target=analysis_plugin.RunPlugin, args=(analysis_context_object,))
self._analysis_processes.append(analysis_process)
analysis_process.start()
logging.info(
u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name))
else:
event_queue_producers = []
output_buffer = output_lib.EventBuffer(output_module, options.dedup)
with output_buffer:
counter = ProcessOutput(
output_buffer, output_module, self._filter_object,
self._filter_buffer, event_queue_producers)
for information in storage_file.GetStorageInformation():
if hasattr(information, 'counter'):
counter['Stored Events'] += information.counter['total']
if not options.quiet:
logging.info(u'Output processing is done.')
# Get all reports and tags from analysis plugins.
if options.analysis_plugins:
logging.info(u'Processing data from analysis plugins.')
for event_queue_producer in event_queue_producers:
event_queue_producer.SignalEndOfInput()
# Wait for all analysis plugins to complete.
for number, analysis_process in enumerate(self._analysis_processes):
logging.debug(
u'Waiting for analysis plugin: {0:d} to complete.'.format(number))
if analysis_process.is_alive():
analysis_process.join(10)
else:
logging.warning(u'Plugin {0:d} already stopped.'.format(number))
analysis_process.terminate()
logging.debug(u'All analysis plugins are now stopped.')
# Close the output queue.
analysis_output_queue.SignalEndOfInput()
# Go over each output.
analysis_queue_consumer = PsortAnalysisReportQueueConsumer(
analysis_output_queue, storage_file, self._filter_expression,
self.preferred_encoding)
analysis_queue_consumer.ConsumeItems()
if analysis_queue_consumer.tags:
storage_file.StoreTagging(analysis_queue_consumer.tags)
# TODO: analysis_queue_consumer.anomalies:
for item, value in analysis_queue_consumer.counter.iteritems():
counter[item] = value
if self._filter_object and not counter['Limited By']:
counter['Filter By Date'] = (
counter['Stored Events'] - counter['Events Included'] -
counter['Events Filtered Out'])
return counter
# TODO: Function: _ConsumeItem is not defined, inspect if we need to define it
# or change the interface so that is not an abstract method.
# TODO: Remove this after dfVFS integration.
# pylint: disable=abstract-method
class PsortAnalysisReportQueueConsumer(queue.ItemQueueConsumer):
"""Class that implements an analysis report queue consumer for psort."""
def __init__(
self, queue_object, storage_file, filter_string, preferred_encoding):
"""Initializes the queue consumer.
Args:
queue_object: the queue object (instance of Queue).
storage_file: the storage file (instance of StorageFile).
filter_string: the filter string.
preferred_encoding: the preferred encoding.
"""
super(PsortAnalysisReportQueueConsumer, self).__init__(queue_object)
self._filter_string = filter_string
self._preferred_encoding = preferred_encoding
self._storage_file = storage_file
self.anomalies = []
self.counter = collections.Counter()
self.tags = []
def _ConsumeItem(self, analysis_report):
"""Consumes an item callback for ConsumeItems.
Args:
analysis_report: the analysis report (instance of AnalysisReport).
"""
self.counter['Total Reports'] += 1
self.counter[u'Report: {0:s}'.format(analysis_report.plugin_name)] += 1
self.anomalies.extend(analysis_report.GetAnomalies())
self.tags.extend(analysis_report.GetTags())
if self._filter_string:
analysis_report.filter_string = self._filter_string
# For now we print the report to disk and then save it.
# TODO: Have the option of saving to a separate file and
# do something more here, for instance saving into a HTML
# file, or something else (including potential images).
self._storage_file.StoreReport(analysis_report)
report_string = analysis_report.GetString()
try:
print report_string.encode(self._preferred_encoding)
except UnicodeDecodeError:
logging.error(
u'Unable to print report due to an unicode decode error. '
u'The report is stored inside the storage file and can be '
u'viewed using pinfo [if unable to view please submit a '
u'bug report https://github.com/log2timeline/plaso/issues')
def _AppendEvent(event_object, output_buffer, event_queues):
"""Appends an event object to an output buffer and queues.
Args:
event_object: an event object (instance of EventObject).
output_buffer: the output buffer.
event_queues: a list of event queues that serve as input for
the analysis plugins.
"""
output_buffer.Append(event_object)
# Needed due to duplicate removals, if two events
# are merged then we'll just pick the first inode value.
inode = getattr(event_object, 'inode', None)
if isinstance(inode, basestring):
inode_list = inode.split(';')
try:
new_inode = int(inode_list[0], 10)
except (ValueError, IndexError):
new_inode = 0
event_object.inode = new_inode
for event_queue in event_queues:
event_queue.ProduceItem(event_object)
def ProcessOutput(
output_buffer, output_module, my_filter=None, filter_buffer=None,
analysis_queues=None):
"""Fetch EventObjects from storage and process and filter them.
Args:
output_buffer: output.EventBuffer object.
output_module: The output module (instance of OutputFormatter).
my_filter: A filter object.
filter_buffer: A filter buffer used to store previously discarded
events to store time slice history.
analysis_queues: A list of analysis queues.
"""
counter = collections.Counter()
my_limit = getattr(my_filter, 'limit', 0)
forward_entries = 0
if not analysis_queues:
analysis_queues = []
event_object = output_module.FetchEntry()
while event_object:
if my_filter:
event_match = event_object
if isinstance(event_object, plaso_storage_pb2.EventObject):
# TODO: move serialization to storage, if low-level filtering is needed
# storage should provide functions for it.
serializer = protobuf_serializer.ProtobufEventObjectSerializer
event_match = serializer.ReadSerialized(event_object)
if my_filter.Match(event_match):
counter['Events Included'] += 1
if filter_buffer:
# Indicate we want forward buffering.
forward_entries = 1
# Empty the buffer.
for event_in_buffer in filter_buffer.Flush():
counter['Events Added From Slice'] += 1
counter['Events Included'] += 1
counter['Events Filtered Out'] -= 1
_AppendEvent(event_in_buffer, output_buffer, analysis_queues)
_AppendEvent(event_object, output_buffer, analysis_queues)
if my_limit:
if counter['Events Included'] == my_limit:
break
else:
if filter_buffer and forward_entries:
if forward_entries <= filter_buffer.size:
_AppendEvent(event_object, output_buffer, analysis_queues)
forward_entries += 1
counter['Events Added From Slice'] += 1
counter['Events Included'] += 1
else:
# Reached the max, don't include other entries.
forward_entries = 0
counter['Events Filtered Out'] += 1
elif filter_buffer:
filter_buffer.Append(event_object)
counter['Events Filtered Out'] += 1
else:
counter['Events Filtered Out'] += 1
else:
counter['Events Included'] += 1
_AppendEvent(event_object, output_buffer, analysis_queues)
event_object = output_module.FetchEntry()
if output_buffer.duplicate_counter:
counter['Duplicate Removals'] = output_buffer.duplicate_counter
if my_limit:
counter['Limited By'] = my_limit
return counter
def Main(arguments=None):
"""Start the tool."""
multiprocessing.freeze_support()
front_end = PsortFrontend()
arg_parser = argparse.ArgumentParser(
description=(
u'PSORT - Application to read, filter and process '
u'output from a plaso storage file.'), add_help=False)
tool_group = arg_parser.add_argument_group('Optional Arguments For Psort')
output_group = arg_parser.add_argument_group(
'Optional Arguments For Output Modules')
analysis_group = arg_parser.add_argument_group(
'Optional Arguments For Analysis Modules')
tool_group.add_argument(
'-d', '--debug', action='store_true', dest='debug', default=False,
help='Fall back to debug shell if psort fails.')
tool_group.add_argument(
'-q', '--quiet', action='store_true', dest='quiet', default=False,
help='Don\'t print out counter information after processing.')
tool_group.add_argument(
'-h', '--help', action='help', help='Show this help message and exit.')
tool_group.add_argument(
'-a', '--include_all', action='store_false', dest='dedup', default=True,
help=(
'By default the tool removes duplicate entries from the output. '
'This parameter changes that behavior so all events are included.'))
tool_group.add_argument(
'-o', '--output_format', '--output-format', metavar='FORMAT',
dest='output_format', default='dynamic', help=(
'The output format or "-o list" to see a list of available '
'output formats.'))
tool_group.add_argument(
'--analysis', metavar='PLUGIN_LIST', dest='analysis_plugins',
default='', action='store', type=unicode, help=(
'A comma separated list of analysis plugin names to be loaded '
'or "--analysis list" to see a list of available plugins.'))
tool_group.add_argument(
'-z', '--zone', metavar='TIMEZONE', default='UTC', dest='timezone', help=(
'The timezone of the output or "-z list" to see a list of available '
'timezones.'))
tool_group.add_argument(
'-w', '--write', metavar='OUTPUTFILE', dest='write',
help='Output filename. Defaults to stdout.')
tool_group.add_argument(
'--slice', metavar='DATE', dest='slice', type=str,
default='', action='store', help=(
'Create a time slice around a certain date. This parameter, if '
'defined will display all events that happened X minutes before and '
'after the defined date. X is controlled by the parameter '
'--slice_size but defaults to 5 minutes.'))
tool_group.add_argument(
'--slicer', dest='slicer', action='store_true', default=False, help=(
'Create a time slice around every filter match. This parameter, if '
'defined will save all X events before and after a filter match has '
'been made. X is defined by the --slice_size parameter.'))
tool_group.add_argument(
'--slice_size', dest='slice_size', type=int, default=5, action='store',
help=(
'Defines the slice size. In the case of a regular time slice it '
'defines the number of minutes the slice size should be. In the '
'case of the --slicer it determines the number of events before '
'and after a filter match has been made that will be included in '
'the result set. The default value is 5]. See --slice or --slicer '
'for more details about this option.'))
tool_group.add_argument(
'-v', '--version', dest='version', action='version',
version='log2timeline - psort version {0:s}'.format(plaso.GetVersion()),
help='Show the current version of psort.')
front_end.AddStorageFileOptions(tool_group)
tool_group.add_argument(
'filter', nargs='?', action='store', metavar='FILTER', default=None,
type=unicode, help=(
'A filter that can be used to filter the dataset before it '
'is written into storage. More information about the filters'
' and it\'s usage can be found here: http://plaso.kiddaland.'
'net/usage/filters'))
if arguments is None:
arguments = sys.argv[1:]
# Add the output module options.
if '-o' in arguments:
argument_index = arguments.index('-o') + 1
elif '--output_format' in arguments:
argument_index = arguments.index('--output_format') + 1
elif '--output-format' in arguments:
argument_index = arguments.index('--output-format') + 1
else:
argument_index = 0
if argument_index > 0:
module_names = arguments[argument_index]
front_end.AddOutputModuleOptions(output_group, [module_names])
# Add the analysis plugin options.
if '--analysis' in arguments:
argument_index = arguments.index('--analysis') + 1
# Get the names of the analysis plugins that should be loaded.
plugin_names = arguments[argument_index]
try:
front_end.AddAnalysisPluginOptions(analysis_group, plugin_names)
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error('{0:s}'.format(exception))
return False
options = arg_parser.parse_args(args=arguments)
format_str = '[%(levelname)s] %(message)s'
if getattr(options, 'debug', False):
logging.basicConfig(level=logging.DEBUG, format=format_str)
else:
logging.basicConfig(level=logging.INFO, format=format_str)
if options.timezone == 'list':
front_end.ListTimeZones()
return True
if options.analysis_plugins == 'list':
front_end.ListAnalysisPlugins()
return True
if options.output_format == 'list':
front_end.ListOutputModules()
return True
try:
front_end.ParseOptions(options)
except errors.BadConfigOption as exception:
arg_parser.print_help()
print u''
logging.error(u'{0:s}'.format(exception))
return False
if front_end.preferred_encoding == 'ascii':
logging.warning(
u'The preferred encoding of your system is ASCII, which is not optimal '
u'for the typically non-ASCII characters that need to be parsed and '
u'processed. The tool will most likely crash and die, perhaps in a way '
u'that may not be recoverable. A five second delay is introduced to '
u'give you time to cancel the runtime and reconfigure your preferred '
u'encoding, otherwise continue at own risk.')
time.sleep(5)
try:
counter = front_end.ParseStorage(options)
if not options.quiet:
logging.info(frontend_utils.FormatHeader('Counter'))
for element, count in counter.most_common():
logging.info(frontend_utils.FormatOutputString(element, count))
except IOError as exception:
# Piping results to "|head" for instance causes an IOError.
if u'Broken pipe' not in exception:
logging.error(u'Processing stopped early: {0:s}.'.format(exception))
except KeyboardInterrupt:
pass
# Catching every remaining exception in case we are debugging.
except Exception as exception:
if not options.debug:
raise
logging.error(u'{0:s}'.format(exception))
pdb.post_mortem()
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)
+197
View File
@@ -0,0 +1,197 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the psort front-end."""
import os
import StringIO
import unittest
from plaso.formatters import interface as formatters_interface
from plaso.formatters import manager as formatters_manager
from plaso.frontend import psort
from plaso.frontend import test_lib
from plaso.lib import event
from plaso.lib import output
from plaso.lib import pfilter
from plaso.lib import storage
from plaso.lib import timelib_test
class TestEvent1(event.EventObject):
DATA_TYPE = 'test:psort:1'
def __init__(self):
super(TestEvent1, self).__init__()
self.timestamp = 123456
class TestEvent2(event.EventObject):
DATA_TYPE = 'test:psort:2'
def __init__(self, timestamp):
super(TestEvent2, self).__init__()
self.timestamp = timestamp
self.timestamp_desc = 'Last Written'
self.parser = 'TestEvent'
self.display_name = '/dev/none'
self.filename = '/dev/none'
self.some = u'My text dude.'
self.var = {'Issue': False, 'Closed': True}
class TestEvent2Formatter(formatters_interface.EventFormatter):
DATA_TYPE = 'test:psort:2'
FORMAT_STRING = 'My text goes along: {some} lines'
SOURCE_SHORT = 'LOG'
SOURCE_LONG = 'None in Particular'
class TestFormatter(output.LogOutputFormatter):
"""Dummy formatter."""
def FetchEntry(self, store_number=-1, store_index=-1):
return self.store.GetSortedEntry()
def Start(self):
self.filehandle.write((
'date,time,timezone,MACB,source,sourcetype,type,user,host,'
'short,desc,version,filename,inode,notes,format,extra\n'))
def EventBody(self, event_object):
"""Writes the event body.
Args:
event_object: The event object (instance of EventObject).
"""
event_formatter = formatters_manager.EventFormatterManager.GetFormatter(
event_object)
msg, _ = event_formatter.GetMessages(event_object)
source_short, source_long = event_formatter.GetSources(event_object)
self.filehandle.write(u'{0:s}/{1:s} {2:s}\n'.format(
source_short, source_long, msg))
class TestEventBuffer(output.EventBuffer):
"""A test event buffer."""
def __init__(self, store, formatter=None):
self.record_count = 0
self.store = store
if not formatter:
formatter = TestFormatter(store)
super(TestEventBuffer, self).__init__(formatter, False)
def Append(self, event_object):
self._buffer_dict[event_object.EqualityString()] = event_object
self.record_count += 1
def Flush(self):
for event_object_key in self._buffer_dict:
self.formatter.EventBody(self._buffer_dict[event_object_key])
self._buffer_dict = {}
def End(self):
pass
class PsortFrontendTest(test_lib.FrontendTestCase):
"""Tests for the psort front-end."""
def setUp(self):
"""Setup sets parameters that will be reused throughout this test."""
self._front_end = psort.PsortFrontend()
# TODO: have sample output generated from the test.
self._test_file = os.path.join(self._TEST_DATA_PATH, 'psort_test.out')
self.first = timelib_test.CopyStringToTimestamp('2012-07-24 21:45:24')
self.last = timelib_test.CopyStringToTimestamp('2016-11-18 01:15:43')
def testReadEntries(self):
"""Ensure returned EventObjects from the storage are within timebounds."""
timestamp_list = []
pfilter.TimeRangeCache.ResetTimeConstraints()
pfilter.TimeRangeCache.SetUpperTimestamp(self.last)
pfilter.TimeRangeCache.SetLowerTimestamp(self.first)
storage_file = storage.StorageFile(self._test_file, read_only=True)
storage_file.SetStoreLimit()
event_object = storage_file.GetSortedEntry()
while event_object:
timestamp_list.append(event_object.timestamp)
event_object = storage_file.GetSortedEntry()
self.assertEquals(len(timestamp_list), 8)
self.assertTrue(
timestamp_list[0] >= self.first and timestamp_list[-1] <= self.last)
storage_file.Close()
def testOutput(self):
"""Testing if psort can output data."""
events = []
events.append(TestEvent2(5134324321))
events.append(TestEvent2(2134324321))
events.append(TestEvent2(9134324321))
events.append(TestEvent2(15134324321))
events.append(TestEvent2(5134324322))
events.append(TestEvent2(5134024321))
output_fd = StringIO.StringIO()
with test_lib.TempDirectory() as dirname:
temp_file = os.path.join(dirname, 'plaso.db')
storage_file = storage.StorageFile(temp_file, read_only=False)
pfilter.TimeRangeCache.ResetTimeConstraints()
storage_file.SetStoreLimit()
storage_file.AddEventObjects(events)
storage_file.Close()
storage_file = storage.StorageFile(temp_file)
with storage_file:
storage_file.store_range = [1]
formatter = TestFormatter(storage_file, output_fd)
event_buffer = TestEventBuffer(storage_file, formatter)
psort.ProcessOutput(event_buffer, formatter, None)
event_buffer.Flush()
lines = []
for line in output_fd.getvalue().split('\n'):
if line == '.':
continue
if line:
lines.append(line)
# One more line than events (header row).
self.assertEquals(len(lines), 7)
self.assertTrue('My text goes along: My text dude. lines' in lines[2])
self.assertTrue('LOG/' in lines[2])
self.assertTrue('None in Particular' in lines[2])
self.assertEquals(lines[0], (
'date,time,timezone,MACB,source,sourcetype,type,user,host,short,desc,'
'version,filename,inode,notes,format,extra'))
if __name__ == '__main__':
unittest.main()
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Front-end related functions and classes for testing."""
import os
import shutil
import tempfile
import unittest
class Options(object):
"""A simple configuration object."""
class TempDirectory(object):
"""A self cleaning temporary directory."""
def __init__(self):
"""Initializes the temporary directory."""
super(TempDirectory, self).__init__()
self.name = u''
def __enter__(self):
"""Make this work with the 'with' statement."""
self.name = tempfile.mkdtemp()
return self.name
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make this work with the 'with' statement."""
shutil.rmtree(self.name, True)
class FrontendTestCase(unittest.TestCase):
"""The unit test case for a front-end."""
_TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
# Show full diff results, part of TestCase so does not follow our naming
# conventions.
maxDiff = None
def _GetTestFilePath(self, path_segments):
"""Retrieves the path of a test file relative to the test data directory.
Args:
path_segments: the path segments inside the test data directory.
Returns:
A path of the test file.
"""
# Note that we need to pass the individual path segments to os.path.join
# and not a list.
return os.path.join(self._TEST_DATA_PATH, *path_segments)
+212
View File
@@ -0,0 +1,212 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Frontend utility classes and functions."""
import binascii
import tempfile
import os
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.lib import timelib
# TODO: add tests for the functions in this class.
class OutputWriter(object):
"""Class that defines output writing methods for the frontends and tools."""
DATA_BUFFER_SIZE = 32768
@classmethod
def GetDateTimeString(cls, timestamp):
"""Returns a human readable date and time string in the ISO 8601 format."""
return timelib.Timestamp.CopyToIsoFormat(timestamp)
@classmethod
def GetEventDataHexDump(cls, event_object, before=0, length=20):
"""Returns a hexadecimal representation of the event data.
This function creates a hexadecimal string representation based on
the event data described by the event object.
Args:
event_object: The event object (instance of EventObject).
before: Optional number of bytes to include in the output before
the event. The default is none.
length: Optional number of lines to include in the output.
The default is 20.
Returns:
A string that contains the hexadecimal representation of the event data.
"""
if not event_object:
return u'Missing event object.'
if not hasattr(event_object, 'pathspec'):
return u'Event object has no path specification.'
try:
file_entry = path_spec_resolver.Resolver.OpenFileEntry(
event_object.pathspec)
except IOError as exception:
return u'Unable to open file with error: {0:s}'.format(exception)
offset = getattr(event_object, 'offset', 0)
if offset - before > 0:
offset -= before
file_object = file_entry.GetFileObject()
file_object.seek(offset, os.SEEK_SET)
data = file_object.read(int(length) * 16)
file_object.close()
return cls.GetHexDump(data, offset)
@classmethod
def GetHexDump(cls, data, offset=0):
"""Returns a hexadecimal representation of the contents of a binary string.
All ASCII characters in the hexadecimal representation (hexdump) are
translated back to their character representation.
Args:
data: The binary string.
offset: An optional start point in bytes where the data lies, for
presentation purposes.
Returns:
A string that contains the hexadecimal representation of the binary
string.
"""
hexdata = binascii.hexlify(data)
output_strings = []
# Note that the // statement is a Python specific method of ensuring
# an integer division.
hexdata_length = len(hexdata)
lines_of_hexdata = hexdata_length // 32
line_number = 0
point = 0
while line_number < lines_of_hexdata:
line_of_hexdata = hexdata[point:point + 32]
output_strings.append(
cls.GetHexDumpLine(line_of_hexdata, offset, line_number))
hexdata_length -= 32
line_number += 1
point += 32
if hexdata_length > 0:
line_of_hexdata = '{0:s}{1:s}'.format(
hexdata[point:], ' ' * (32 - hexdata_length))
output_strings.append(
cls.GetHexDumpLine(line_of_hexdata, offset, line_number))
return '\n'.join(output_strings)
@classmethod
def GetHexDumpLine(cls, line, orig_ofs, entry_nr=0):
"""Returns a single line of 'xxd'-like hexadecimal representation."""
output_strings = []
output_strings.append('{0:07x}: '.format(orig_ofs + entry_nr * 16))
for bit in range(0, 8):
output_strings.append('{0:s} '.format(line[bit * 4:bit * 4 + 4]))
for bit in range(0, 16):
try:
data = binascii.unhexlify(line[bit * 2: bit * 2 + 2])
except TypeError:
data = '.'
if ord(data) > 31 and ord(data) < 128:
output_strings.append(data)
else:
output_strings.append('.')
return ''.join(output_strings)
@classmethod
def WriteFile(cls, input_file_object, output_path=None):
"""Writes the data of a file-like object to a "regular" file.
Args:
input_file_object: the input file-like object.
output_path: the path of the output path. The default is None which will
write the data to a temporary file.
Returns:
The path of the output file.
"""
if output_path:
output_file_object = open(output_path, 'wb')
else:
output_file_object = tempfile.NamedTemporaryFile()
output_path = output_file_object.name
input_file_object.seek(0, os.SEEK_SET)
data = input_file_object.read(cls.DATA_BUFFER_SIZE)
while data:
output_file_object.write(data)
data = input_file_object.read(cls.DATA_BUFFER_SIZE)
output_file_object.close()
return output_path
def FormatHeader(header, char='*'):
"""Formats the header as a line of 80 chars with the header text centered."""
format_string = '\n{{0:{0:s}^80}}'.format(char)
return format_string.format(u' {0:s} '.format(header))
def FormatOutputString(name, description, col_length=25):
"""Return a formatted string ready for output."""
max_width = 80
line_length = max_width - col_length - 3
# TODO: add an explanation what this code is doing.
fmt = u'{{:>{0:d}s}} : {{}}'.format(col_length)
fmt_second = u'{{:<{0:d}}}{{}}'.format(col_length + 3)
description = unicode(description)
if len(description) < line_length:
return fmt.format(name, description)
# Split each word up in the description.
words = description.split()
current = 0
lines = []
word_buffer = []
for word in words:
current += len(word) + 1
if current >= line_length:
current = len(word)
lines.append(u' '.join(word_buffer))
word_buffer = [word]
else:
word_buffer.append(word)
lines.append(u' '.join(word_buffer))
ret = []
ret.append(fmt.format(name, lines[0]))
for line in lines[1:]:
ret.append(fmt_second.format('', line))
return u'\n'.join(ret)