Import from old repository

This commit is contained in:
Stefan
2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
+30
View File
@@ -0,0 +1,30 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '1.2.0'
VERSION_DEV = False
VERSION_DATE = '20141220'
def GetVersion():
"""Returns version information for plaso."""
if not VERSION_DEV:
return __version__
return u'{0:s}_{1:s}'.format(__version__, VERSION_DATE)
+83
View File
@@ -0,0 +1,83 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Import statements for analysis plugins and common methods."""
from plaso.analysis import interface
from plaso.lib import errors
# Import statements of analysis plugins.
from plaso.analysis import browser_search
from plaso.analysis import chrome_extension
from plaso.analysis import windows_services
# TODO: move these functions to a manager class. And add a test for this
# function.
def ListAllPluginNames(show_all=True):
"""Return a list of all available plugin names and it's doc string."""
results = []
for cls_obj in interface.AnalysisPlugin.classes.itervalues():
doc_string, _, _ = cls_obj.__doc__.partition('\n')
obj = cls_obj(None)
if not show_all and cls_obj.ENABLE_IN_EXTRACTION:
results.append((obj.plugin_name, doc_string, obj.plugin_type))
elif show_all:
results.append((obj.plugin_name, doc_string, obj.plugin_type))
return sorted(results)
def LoadPlugins(plugin_names, incoming_queues, options=None):
"""Yield analysis plugins for a given list of plugin names.
Given a list of plugin names this method finds the analysis
plugins, initializes them and returns a generator.
Args:
plugin_names: A list of plugin names that should be loaded up. This
should be a list of strings.
incoming_queues: A list of queues (QueueInterface object) that the plugin
uses to read in incoming events to analyse.
options: Optional command line arguments (instance of
argparse.Namespace). The default is None.
Yields:
Analysis plugin objects (instances of AnalysisPlugin).
Raises:
errors.BadConfigOption: If plugins_names does not contain a list of
strings.
"""
try:
plugin_names_lower = [word.lower() for word in plugin_names]
except AttributeError:
raise errors.BadConfigOption(u'Plugin names should be a list of strings.')
for plugin_object in interface.AnalysisPlugin.classes.itervalues():
plugin_name = plugin_object.NAME.lower()
if plugin_name in plugin_names_lower:
queue_index = plugin_names_lower.index(plugin_name)
try:
incoming_queue = incoming_queues[queue_index]
except (TypeError, IndexError):
incoming_queue = None
yield plugin_object(incoming_queue, options)
+257
View File
@@ -0,0 +1,257 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A plugin that extracts browser history from events."""
import collections
import logging
import urllib
from plaso import filters
from plaso.analysis import interface
from plaso.formatters import manager as formatters_manager
from plaso.lib import event
# Create a lightweight object that is used to store timeline based information
# about each search term.
SEARCH_OBJECT = collections.namedtuple(
'SEARCH_OBJECT', 'time source engine search_term')
def ScrubLine(line):
"""Scrub the line of most obvious HTML codes.
An attempt at taking a line and swapping all instances
of %XX which represent a character in hex with it's
unicode character.
Args:
line: The string that we are about to "fix".
Returns:
String that has it's %XX hex codes swapped for text.
"""
if not line:
return ''
try:
return unicode(urllib.unquote(str(line)), 'utf-8')
except UnicodeDecodeError:
logging.warning(u'Unable to decode line: {0:s}'.format(line))
return line
class FilterClass(object):
"""A class that contains all the parser functions."""
@classmethod
def _GetBetweenQEqualsAndAmbersand(cls, string):
"""Return back string that is defined 'q=' and '&'."""
if 'q=' not in string:
return string
_, _, line = string.partition('q=')
before_and, _, _ = line.partition('&')
if not before_and:
return line
return before_and.split()[0]
@classmethod
def _SearchAndQInLine(cls, string):
"""Return a bool indicating if the words q= and search appear in string."""
return 'search' in string and 'q=' in string
@classmethod
def GoogleSearch(cls, url):
"""Return back the extracted string."""
if not cls._SearchAndQInLine(url):
return
line = cls._GetBetweenQEqualsAndAmbersand(url)
if not line:
return
return line.replace('+', ' ')
@classmethod
def YouTube(cls, url):
"""Return back the extracted string."""
return cls.GenericSearch(url)
@classmethod
def BingSearch(cls, url):
"""Return back the extracted string."""
return cls.GenericSearch(url)
@classmethod
def GenericSearch(cls, url):
"""Return back the extracted string from a generic search engine."""
if not cls._SearchAndQInLine(url):
return
return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
@classmethod
def Yandex(cls, url):
"""Return back the results from Yandex search engine."""
if 'text=' not in url:
return
_, _, line = url.partition('text=')
before_and, _, _ = line.partition('&')
if not before_and:
return
yandex_search_url = before_and.split()[0]
return yandex_search_url.replace('+', ' ')
@classmethod
def DuckDuckGo(cls, url):
"""Return back the extracted string."""
if not 'q=' in url:
return
return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
@classmethod
def Gmail(cls, url):
"""Return back the extracted string."""
if 'search/' not in url:
return
_, _, line = url.partition('search/')
first, _, _ = line.partition('/')
second, _, _ = first.partition('?compose')
return second.replace('+', ' ')
class AnalyzeBrowserSearchPlugin(interface.AnalysisPlugin):
"""Analyze browser search entries from events."""
NAME = 'browser_search'
# Indicate that we do not want to run this plugin during regular extraction.
ENABLE_IN_EXTRACTION = False
# Here we define filters and callback methods for all hits on each filter.
FILTERS = (
(('url iregexp "(www.|encrypted.|/)google." and url contains "search"'),
'GoogleSearch'),
('url contains "youtube.com"', 'YouTube'),
(('source is "WEBHIST" and url contains "bing.com" and url contains '
'"search"'), 'BingSearch'),
('url contains "mail.google.com"', 'Gmail'),
(('source is "WEBHIST" and url contains "yandex.com" and url contains '
'"yandsearch"'), 'Yandex'),
('url contains "duckduckgo.com"', 'DuckDuckGo')
)
# We need to implement the interface for analysis plugins, but we don't use
# command line options here, so disable checking for unused args.
# pylint: disable=unused-argument
def __init__(self, incoming_queue, options=None):
"""Initializes the browser search analysis plugin.
Args:
incoming_queue: A queue that is used to listen to incoming events.
options: Optional command line arguments (instance of
argparse.Namespace). The default is None.
"""
super(AnalyzeBrowserSearchPlugin, self).__init__(incoming_queue)
self._filter_dict = {}
self._counter = collections.Counter()
# Store a list of search terms in a timeline format.
# The format is key = timestamp, value = (source, engine, search term).
self._search_term_timeline = []
for filter_str, call_back in self.FILTERS:
filter_obj = filters.GetFilter(filter_str)
call_back_obj = getattr(FilterClass, call_back, None)
if filter_obj and call_back_obj:
self._filter_dict[filter_obj] = (call_back, call_back_obj)
# pylint: enable=unused-argument
def CompileReport(self):
"""Compiles a report of the analysis.
Returns:
The analysis report (instance of AnalysisReport).
"""
report = event.AnalysisReport()
results = {}
for key, count in self._counter.iteritems():
search_engine, _, search_term = key.partition(':')
results.setdefault(search_engine, {})
results[search_engine][search_term] = count
report.report_dict = results
report.report_array = self._search_term_timeline
lines_of_text = []
for search_engine, terms in sorted(results.items()):
lines_of_text.append(u' == ENGINE: {0:s} =='.format(search_engine))
for search_term, count in sorted(
terms.iteritems(), key=lambda x: (x[1], x[0]), reverse=True):
lines_of_text.append(u'{0:d} {1:s}'.format(count, search_term))
# An empty string is added to have SetText create an empty line.
lines_of_text.append(u'')
report.SetText(lines_of_text)
return report
def ExamineEvent(
self, unused_analysis_context, event_object, **unused_kwargs):
"""Analyzes an event object.
Args:
analysis_context: An analysis context object
(instance of AnalysisContext).
event_object: An event object (instance of EventObject).
"""
# This event requires an URL attribute.
url_attribute = getattr(event_object, 'url', None)
if not url_attribute:
return
# TODO: refactor this the source should be used in formatting only.
# Check if we are dealing with a web history event.
source, _ = formatters_manager.EventFormatterManager.GetSourceStrings(
event_object)
if source != 'WEBHIST':
return
for filter_obj, call_backs in self._filter_dict.items():
call_back_name, call_back_object = call_backs
if filter_obj.Match(event_object):
returned_line = ScrubLine(call_back_object(url_attribute))
if not returned_line:
continue
self._counter[u'{0:s}:{1:s}'.format(call_back_name, returned_line)] += 1
# Add the timeline format for each search term.
self._search_term_timeline.append(SEARCH_OBJECT(
getattr(event_object, 'timestamp', 0),
getattr(event_object, 'plugin', getattr(
event_object, 'parser', u'N/A')),
call_back_name, returned_line))
+74
View File
@@ -0,0 +1,74 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the browser search analysis plugin."""
import unittest
from plaso.analysis import browser_search
from plaso.analysis import test_lib
# pylint: disable=unused-import
from plaso.formatters import chrome as chrome_formatter
from plaso.lib import event
from plaso.parsers import sqlite
from plaso.parsers.sqlite_plugins import chrome
class BrowserSearchAnalysisTest(test_lib.AnalysisPluginTestCase):
"""Tests for the browser search analysis plugin."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self._parser = sqlite.SQLiteParser()
def testAnalyzeFile(self):
"""Read a storage file that contains URL data and analyze it."""
knowledge_base = self._SetUpKnowledgeBase()
test_file = self._GetTestFilePath(['History'])
event_queue = self._ParseFile(self._parser, test_file, knowledge_base)
analysis_plugin = browser_search.AnalyzeBrowserSearchPlugin(event_queue)
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
self.assertEquals(len(analysis_reports), 1)
analysis_report = analysis_reports[0]
# Due to the behavior of the join one additional empty string at the end
# is needed to create the last empty line.
expected_text = u'\n'.join([
u' == ENGINE: GoogleSearch ==',
u'1 really really funny cats',
u'1 java plugin',
u'1 funnycats.exe',
u'1 funny cats',
u'',
u''])
self.assertEquals(analysis_report.text, expected_text)
self.assertEquals(analysis_report.plugin_name, 'browser_search')
expected_keys = set([u'GoogleSearch'])
self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
if __name__ == '__main__':
unittest.main()
+201
View File
@@ -0,0 +1,201 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A plugin that gather extension ID's from Chrome history browser."""
import logging
import re
import urllib2
from plaso.analysis import interface
from plaso.lib import event
class AnalyzeChromeExtensionPlugin(interface.AnalysisPlugin):
"""Convert Chrome extension ID's into names, requires Internet connection."""
NAME = 'chrome_extension'
# Indicate that we can run this plugin during regular extraction.
ENABLE_IN_EXTRACTION = True
_TITLE_RE = re.compile('<title>([^<]+)</title>')
_WEB_STORE_URL = u'https://chrome.google.com/webstore/detail/{xid}?hl=en-US'
# We need to implement the interface for analysis plugins, but we don't use
# command line options here, so disable checking for unused args.
# pylint: disable=unused-argument
def __init__(self, incoming_queue, options=None):
"""Initializes the Chrome extension analysis plugin.
Args:
incoming_queue: A queue that is used to listen to incoming events.
options: Optional command line arguments (instance of
argparse.Namespace). The default is None.
"""
super(AnalyzeChromeExtensionPlugin, self).__init__(incoming_queue)
self._results = {}
self.plugin_type = self.TYPE_REPORT
# TODO: see if these can be moved to arguments passed to ExamineEvent
# or some kind of state object.
self._sep = None
self._user_paths = None
# Saved list of already looked up extensions.
self._extensions = {}
# pylint: enable=unused-argument
def _GetChromeWebStorePage(self, extension_id):
"""Retrieves the page for the extension from the Chrome store website.
Args:
extension_id: string containing the extension identifier.
"""
web_store_url = self._WEB_STORE_URL.format(xid=extension_id)
try:
response = urllib2.urlopen(web_store_url)
except urllib2.HTTPError as exception:
logging.warning((
u'[{0:s}] unable to retrieve URL: {1:s} with error: {2:s}').format(
self.NAME, web_store_url, exception))
return
except urllib2.URLError as exception:
logging.warning((
u'[{0:s}] invalid URL: {1:s} with error: {2:s}').format(
self.NAME, web_store_url, exception))
return
return response
def _GetTitleFromChromeWebStore(self, extension_id):
"""Retrieves the name of the extension from the Chrome store website.
Args:
extension_id: string containing the extension identifier.
"""
# Check if we have already looked this extension up.
if extension_id in self._extensions:
return self._extensions.get(extension_id)
response = self._GetChromeWebStorePage(extension_id)
if not response:
logging.warning(
u'[{0:s}] no data returned for extension identifier: {1:s}'.format(
self.NAME, extension_id))
return
first_line = response.readline()
match = self._TITLE_RE.search(first_line)
if match:
title = match.group(1)
if title.startswith(u'Chrome Web Store - '):
name = title[19:]
elif title.endswith(u'- Chrome Web Store'):
name = title[:-19]
self._extensions[extension_id] = name
return name
self._extensions[extension_id] = u'Not Found'
def CompileReport(self):
"""Compiles a report of the analysis.
Returns:
The analysis report (instance of AnalysisReport).
"""
report = event.AnalysisReport()
report.report_dict = self._results
lines_of_text = []
for user, extensions in sorted(self._results.iteritems()):
lines_of_text.append(u' == USER: {0:s} =='.format(user))
for extension, extension_id in sorted(extensions):
lines_of_text.append(u' {0:s} [{1:s}]'.format(extension, extension_id))
# An empty string is added to have SetText create an empty line.
lines_of_text.append(u'')
report.SetText(lines_of_text)
return report
def ExamineEvent(self, analysis_context, event_object, **unused_kwargs):
"""Analyzes an event object.
Args:
analysis_context: An analysis context object
(instance of AnalysisContext).
event_object: An event object (instance of EventObject).
"""
# Only interested in filesystem events.
if event_object.data_type != 'fs:stat':
return
filename = getattr(event_object, 'filename', None)
if not filename:
return
# Determine if we have a Chrome extension ID.
if u'chrome' not in filename.lower():
return
if not self._sep:
self._sep = analysis_context.GetPathSegmentSeparator(filename)
if not self._user_paths:
self._user_paths = analysis_context.GetUserPaths(analysis_context.users)
if u'{0:s}Extensions{0:s}'.format(self._sep) not in filename:
return
# Now we have extension ID's, let's check if we've got the
# folder, nothing else.
paths = filename.split(self._sep)
if paths[-2] != u'Extensions':
return
extension_id = paths[-1]
if extension_id == u'Temp':
return
# Get the user and ID.
user = analysis_context.GetUsernameFromPath(
self._user_paths, filename, self._sep)
# We still want this information in here, so that we can
# manually deduce the username.
if not user:
if len(filename) > 25:
user = u'Not found ({0:s}...)'.format(filename[0:25])
else:
user = u'Not found ({0:s})'.format(filename)
extension = self._GetTitleFromChromeWebStore(extension_id)
if not extension:
extension = extension_id
self._results.setdefault(user, [])
extension_string = extension.decode('utf-8', 'ignore')
if (extension_string, extension_id) not in self._results[user]:
self._results[user].append((extension_string, extension_id))
+196
View File
@@ -0,0 +1,196 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the chrome extension analysis plugin."""
import os
import unittest
from plaso.analysis import chrome_extension
from plaso.analysis import test_lib
from plaso.engine import queue
from plaso.engine import single_process
from plaso.lib import event
# We are accessing quite a lot of protected members in this test file.
# Suppressing that message test file wide.
# pylint: disable=protected-access
class AnalyzeChromeExtensionTestPlugin(
chrome_extension.AnalyzeChromeExtensionPlugin):
"""Chrome extension analysis plugin used for testing."""
NAME = 'chrome_extension_test'
_TEST_DATA_PATH = os.path.join(
os.getcwd(), u'test_data', u'chrome_extensions')
def _GetChromeWebStorePage(self, extension_id):
"""Retrieves the page for the extension from the Chrome store test data.
Args:
extension_id: string containing the extension identifier.
"""
chrome_web_store_file = os.path.join(self._TEST_DATA_PATH, extension_id)
if not os.path.exists(chrome_web_store_file):
return
return open(chrome_web_store_file, 'rb')
class ChromeExtensionTest(test_lib.AnalysisPluginTestCase):
"""Tests for the chrome extension analysis plugin."""
# Few config options here.
MAC_PATHS = [
'/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions',
('/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions/'
'apdfllckaahabafndbhieahigkjlhalf'),
'/private/var/log/system.log',
'/Users/frank/Library/Application Data/Google/Chrome/Default',
'/Users/hans/Library/Application Data/Google/Chrome/Default',
('/Users/frank/Library/Application Data/Google/Chrome/Default/'
'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'),
'/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',]
WIN_PATHS = [
'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions',
('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\'
'hmjkmjkepdijhoojdojkdfohbdgmmhki'),
('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
'blpcfgokakmgnkcojhhkbfbldkacnbeo'),
'\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions',
('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
'icppfcnhkcmnfdhfhphakoifcfokfdhg'),
'C:\\Windows\\System32',
'\\Stuff/with path separator\\Folder']
MAC_USERS = [
{u'name': u'root', u'path': u'/var/root', u'sid': u'0'},
{u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'},
{u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'},
{u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}]
WIN_USERS = [
{u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'},
{u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}]
def _CreateTestEventObject(self, path):
"""Create a test event object with a particular path."""
event_object = event.EventObject()
event_object.data_type = 'fs:stat'
event_object.timestamp = 12345
event_object.timestamp_desc = u'Some stuff'
event_object.filename = path
return event_object
def testMacAnalyzerPlugin(self):
"""Test the plugin against mock events."""
knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={
'users': self.MAC_USERS})
event_queue = single_process.SingleProcessQueue()
# Fill the incoming queue with events.
test_queue_producer = queue.ItemQueueProducer(event_queue)
test_queue_producer.ProduceItems([
self._CreateTestEventObject(path) for path in self.MAC_PATHS])
test_queue_producer.SignalEndOfInput()
# Initialize plugin.
analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)
# Run the analysis plugin.
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
self.assertEquals(len(analysis_reports), 1)
analysis_report = analysis_reports[0]
self.assertEquals(analysis_plugin._sep, u'/')
# Due to the behavior of the join one additional empty string at the end
# is needed to create the last empty line.
expected_text = u'\n'.join([
u' == USER: dude ==',
u' Google Drive [apdfllckaahabafndbhieahigkjlhalf]',
u'',
u' == USER: frank ==',
u' Gmail [pjkljhegncpnkpknbcohdijeoejaedia]',
u'',
u''])
self.assertEquals(analysis_report.text, expected_text)
self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')
expected_keys = set([u'frank', u'dude'])
self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
def testWinAnalyzePlugin(self):
"""Test the plugin against mock events."""
knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={
'users': self.WIN_USERS})
event_queue = single_process.SingleProcessQueue()
# Fill the incoming queue with events.
test_queue_producer = queue.ItemQueueProducer(event_queue)
test_queue_producer.ProduceItems([
self._CreateTestEventObject(path) for path in self.WIN_PATHS])
test_queue_producer.SignalEndOfInput()
# Initialize plugin.
analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)
# Run the analysis plugin.
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
self.assertEquals(len(analysis_reports), 1)
analysis_report = analysis_reports[0]
self.assertEquals(analysis_plugin._sep, u'\\')
# Due to the behavior of the join one additional empty string at the end
# is needed to create the last empty line.
expected_text = u'\n'.join([
u' == USER: dude ==',
u' Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]',
u'',
u' == USER: frank ==',
u' Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]',
u' YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]',
u'',
u''])
self.assertEquals(analysis_report.text, expected_text)
self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')
expected_keys = set([u'frank', u'dude'])
self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
if __name__ == '__main__':
unittest.main()
+168
View File
@@ -0,0 +1,168 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The analysis context object."""
class AnalysisContext(object):
"""Class that implements the analysis context."""
def __init__(self, analysis_report_queue_producer, knowledge_base):
"""Initializes a analysis plugin context object.
Args:
analysis_report_queue_producer: the analysis report queue producer
(instance of ItemQueueProducer).
knowledge_base: A knowledge base object (instance of KnowledgeBase),
which contains information from the source data needed
for analysis.
"""
super(AnalysisContext, self).__init__()
self._analysis_report_queue_producer = analysis_report_queue_producer
self._knowledge_base = knowledge_base
self.number_of_produced_analysis_reports = 0
@property
def users(self):
"""The list of users."""
return self._knowledge_base.users
def GetPathSegmentSeparator(self, path):
"""Given a path give back the path separator as a best guess.
Args:
path: the path.
Returns:
The path segment separator.
"""
if path.startswith(u'\\') or path[1:].startswith(u':\\'):
return u'\\'
if path.startswith(u'/'):
return u'/'
if u'/' and u'\\' in path:
# Let's count slashes and guess which one is the right one.
forward_count = len(path.split(u'/'))
backward_count = len(path.split(u'\\'))
if forward_count > backward_count:
return u'/'
else:
return u'\\'
# Now we are sure there is only one type of separators yet
# the path does not start with one.
if u'/' in path:
return u'/'
else:
return u'\\'
def GetUsernameFromPath(self, user_paths, file_path, path_segment_separator):
"""Return a username based on preprocessing and the path.
During preprocessing the tool will gather file paths to where each user
profile is stored, and which user it belongs to. This function takes in
a path to a file and compares it to a list of all discovered usernames
and paths to their profiles in the system. If it finds that the file path
belongs to a user profile it will return the username that the profile
belongs to.
Args:
user_paths: A dictionary object containing the paths per username.
file_path: The full path to the file being analyzed.
path_segment_separator: String containing the path segment separator.
Returns:
If possible the responsible username behind the file. Otherwise None.
"""
if not user_paths:
return
if path_segment_separator != u'/':
use_path = file_path.replace(path_segment_separator, u'/')
else:
use_path = file_path
if use_path[1:].startswith(u':/'):
use_path = use_path[2:]
use_path = use_path.lower()
for user, path in user_paths.iteritems():
if use_path.startswith(path):
return user
def GetUserPaths(self, users):
"""Retrieves the user paths.
Args:
users: a list of users.
Returns:
A dictionary object containing the paths per username or None if no users.
"""
if not users:
return
user_paths = {}
user_separator = None
for user in users:
name = user.get('name')
path = user.get('path')
if not path or not name:
continue
if not user_separator:
user_separator = self.GetPathSegmentSeparator(path)
if user_separator != u'/':
path = path.replace(user_separator, u'/').replace(u'//', u'/')
if path[1:].startswith(u':/'):
path = path[2:]
name = name.lower()
user_paths[name] = path.lower()
return user_paths
def ProcessAnalysisReport(self, analysis_report, plugin_name=None):
"""Processes an analysis report before it is emitted to the queue.
Args:
analysis_report: the analysis report object (instance of AnalysisReport).
plugin_name: Optional name of the plugin. The default is None.
"""
if not getattr(analysis_report, 'plugin_name', None) and plugin_name:
analysis_report.plugin_name = plugin_name
def ProduceAnalysisReport(self, analysis_report, plugin_name=None):
"""Produces an analysis report onto the queue.
Args:
analysis_report: the analysis report object (instance of AnalysisReport).
plugin_name: Optional name of the plugin. The default is None.
"""
self.ProcessAnalysisReport(analysis_report, plugin_name=plugin_name)
self._analysis_report_queue_producer.ProduceItem(analysis_report)
self.number_of_produced_analysis_reports += 1
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the analysis context."""
import unittest
from plaso.analysis import context
from plaso.analysis import test_lib
from plaso.engine import queue
from plaso.engine import single_process
class AnalysisContextTest(test_lib.AnalysisPluginTestCase):
"""Tests for the analysis context."""
MAC_PATHS = [
'/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions',
('/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions/'
'apdfllckaahabafndbhieahigkjlhalf'),
'/private/var/log/system.log',
'/Users/frank/Library/Application Data/Google/Chrome/Default',
'/Users/hans/Library/Application Data/Google/Chrome/Default',
('/Users/frank/Library/Application Data/Google/Chrome/Default/'
'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'),
'/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',]
WIN_PATHS = [
'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions',
('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\'
'hmjkmjkepdijhoojdojkdfohbdgmmhki'),
('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
'blpcfgokakmgnkcojhhkbfbldkacnbeo'),
'\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions',
('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\'
'icppfcnhkcmnfdhfhphakoifcfokfdhg'),
'C:\\Windows\\System32',
'\\Stuff/with path separator\\Folder']
MAC_USERS = [
{u'name': u'root', u'path': u'/var/root', u'sid': u'0'},
{u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'},
{u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'},
{u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}]
WIN_USERS = [
{u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'},
{u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}]
def setUp(self):
"""Sets up the needed objects used throughout the test."""
knowledge_base = self._SetUpKnowledgeBase()
analysis_report_queue = single_process.SingleProcessQueue()
analysis_report_queue_producer = queue.ItemQueueProducer(
analysis_report_queue)
self._analysis_context = context.AnalysisContext(
analysis_report_queue_producer, knowledge_base)
def testGetPathSegmentSeparator(self):
"""Tests the GetPathSegmentSeparator function."""
for path in self.MAC_PATHS:
path_segment_separator = self._analysis_context.GetPathSegmentSeparator(
path)
self.assertEquals(path_segment_separator, u'/')
for path in self.WIN_PATHS:
path_segment_separator = self._analysis_context.GetPathSegmentSeparator(
path)
self.assertEquals(path_segment_separator, u'\\')
def testGetUserPaths(self):
"""Tests the GetUserPaths function."""
user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS)
self.assertEquals(
set(user_paths.keys()), set([u'frank', u'dude', u'hans', u'root']))
self.assertEquals(user_paths[u'frank'], u'/users/frank')
self.assertEquals(user_paths[u'dude'], u'/users/dude')
self.assertEquals(user_paths[u'hans'], u'/users/hans')
self.assertEquals(user_paths[u'root'], u'/var/root')
user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS)
self.assertEquals(set(user_paths.keys()), set([u'frank', u'dude']))
self.assertEquals(user_paths[u'frank'], u'/users/frank')
self.assertEquals(user_paths[u'dude'], u'/users/dude')
def testGetUsernameFromPath(self):
"""Tests the GetUsernameFromPath function."""
user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS)
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.MAC_PATHS[0], u'/')
self.assertEquals(username, u'dude')
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.MAC_PATHS[4], u'/')
self.assertEquals(username, u'hans')
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.WIN_PATHS[0], u'/')
self.assertEquals(username, None)
user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS)
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.WIN_PATHS[0], u'\\')
self.assertEquals(username, u'dude')
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.WIN_PATHS[2], u'\\')
self.assertEquals(username, u'frank')
username = self._analysis_context.GetUsernameFromPath(
user_paths, self.MAC_PATHS[2], u'\\')
self.assertEquals(username, None)
if __name__ == '__main__':
unittest.main()
+139
View File
@@ -0,0 +1,139 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains basic interface for analysis plugins."""
import abc
from plaso.engine import queue
from plaso.lib import registry
from plaso.lib import timelib
class AnalysisPlugin(queue.EventObjectQueueConsumer):
"""Analysis plugin gets a copy of each read event for analysis."""
__metaclass__ = registry.MetaclassRegistry
__abstract = True
# The URLS should contain a list of URLs with additional information about
# this analysis plugin.
URLS = []
# The name of the plugin. This is the name that is matched against when
# loading plugins, so it is important that this name is short, concise and
# explains the nature of the plugin easily. It also needs to be unique.
NAME = 'Plugin'
# A flag indicating whether or not this plugin should be run during extraction
# phase or reserved entirely for post processing stage.
# Typically this would mean that the plugin is perhaps too computationally
# heavy to be run during event extraction and should rather be run during
# post-processing.
# Since most plugins should perhaps rather be run during post-processing
# this is set to False by default and needs to be overwritten if the plugin
# should be able to run during the extraction phase.
ENABLE_IN_EXTRACTION = False
# All the possible report types.
TYPE_ANOMALY = 1 # Plugin that is inspecting events for anomalies.
TYPE_STATISTICS = 2 # Statistical calculations.
TYPE_ANNOTATION = 3 # Inspecting events with the primary purpose of
# annotating or tagging them.
TYPE_REPORT = 4 # Inspecting events to provide a summary information.
# Optional arguments to be added to the argument parser.
# An example would be:
# ARGUMENTS = [('--myparameter', {
# 'action': 'store',
# 'help': 'This is my parameter help',
# 'dest': 'myparameter',
# 'default': '',
# 'type': 'unicode'})]
#
# Where all arguments into the dict object have a direct translation
# into the argparse parser.
ARGUMENTS = []
# We need to implement the interface for analysis plugins, but we don't use
# command line options here, so disable checking for unused args.
# pylint: disable=unused-argument
def __init__(self, incoming_queue, options=None):
"""Initializes an analysis plugin.
Args:
incoming_queue: A queue that is used to listen to incoming events.
options: Optional command line arguments (instance of
argparse.Namespace). The default is None.
"""
super(AnalysisPlugin, self).__init__(incoming_queue)
self.plugin_type = self.TYPE_REPORT
# pylint: enable=unused-argument
def _ConsumeEventObject(self, event_object, analysis_context=None, **kwargs):
"""Consumes an event object callback for ConsumeEventObjects.
Args:
event_object: An event object (instance of EventObject).
analysis_context: Optional analysis context object (instance of
AnalysisContext). The default is None.
"""
self.ExamineEvent(analysis_context, event_object, **kwargs)
@property
def plugin_name(self):
"""Return the name of the plugin."""
return self.NAME
@abc.abstractmethod
def CompileReport(self):
"""Compiles a report of the analysis.
After the plugin has received every copy of an event to
analyze this function will be called so that the report
can be assembled.
Returns:
The analysis report (instance of AnalysisReport).
"""
@abc.abstractmethod
def ExamineEvent(self, analysis_context, event_object, **kwargs):
"""Analyzes an event object.
Args:
analysis_context: An analysis context object (instance of
AnalysisContext).
event_object: An event object (instance of EventObject).
"""
def RunPlugin(self, analysis_context):
"""For each item in the queue send the read event to analysis.
Args:
analysis_context: An analysis context object (instance of
AnalysisContext).
"""
self.ConsumeEventObjects(analysis_context=analysis_context)
analysis_report = self.CompileReport()
if analysis_report:
# TODO: move this into the plugins?
analysis_report.time_compiled = timelib.Timestamp.GetNow()
analysis_context.ProduceAnalysisReport(
analysis_report, plugin_name=self.plugin_name)
+171
View File
@@ -0,0 +1,171 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Analysis plugin related functions and classes for testing."""
import os
import unittest
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.analysis import context
from plaso.artifacts import knowledge_base
from plaso.engine import queue
from plaso.engine import single_process
from plaso.lib import event
from plaso.parsers import context as parsers_context
class TestAnalysisReportQueueConsumer(queue.ItemQueueConsumer):
"""Class that implements a test analysis report queue consumer."""
def __init__(self, queue_object):
"""Initializes the queue consumer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(TestAnalysisReportQueueConsumer, self).__init__(queue_object)
self.analysis_reports = []
def _ConsumeItem(self, analysis_report):
"""Consumes an item callback for ConsumeItems.
Args:
analysis_report: the analysis report (instance of AnalysisReport).
"""
self.analysis_reports.append(analysis_report)
@property
def number_of_analysis_reports(self):
"""The number of analysis reports."""
return len(self.analysis_reports)
class AnalysisPluginTestCase(unittest.TestCase):
"""The unit test case for an analysis plugin."""
_TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
# Show full diff results, part of TestCase so does not follow our naming
# conventions.
maxDiff = None
def _GetAnalysisReportsFromQueue(self, analysis_report_queue_consumer):
"""Retrieves the analysis reports from the queue consumer.
Args:
analysis_report_queue_consumer: the analysis report queue consumer
object (instance of
TestAnalysisReportQueueConsumer).
Returns:
A list of analysis reports (instances of AnalysisReport).
"""
analysis_report_queue_consumer.ConsumeItems()
analysis_reports = []
for analysis_report in analysis_report_queue_consumer.analysis_reports:
self.assertIsInstance(analysis_report, event.AnalysisReport)
analysis_reports.append(analysis_report)
return analysis_reports
def _GetTestFilePath(self, path_segments):
"""Retrieves the path of a test file relative to the test data directory.
Args:
path_segments: the path segments inside the test data directory.
Returns:
A path of the test file.
"""
# Note that we need to pass the individual path segments to os.path.join
# and not a list.
return os.path.join(self._TEST_DATA_PATH, *path_segments)
def _ParseFile(self, parser_object, path, knowledge_base_object):
"""Parses a file using the parser object.
Args:
parser_object: the parser object.
path: the path of the file to parse.
knowledge_base_object: the knowledge base object (instance of
KnowledgeBase).
Returns:
An event object queue object (instance of Queue).
"""
event_queue = single_process.SingleProcessQueue()
event_queue_producer = queue.ItemQueueProducer(event_queue)
parse_error_queue = single_process.SingleProcessQueue()
parser_context = parsers_context.ParserContext(
event_queue_producer, parse_error_queue, knowledge_base_object)
path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=path)
file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
parser_object.Parse(parser_context, file_entry)
event_queue.SignalEndOfInput()
return event_queue
def _RunAnalysisPlugin(self, analysis_plugin, knowledge_base_object):
"""Analyzes an event object queue using the plugin object.
Args:
analysis_plugin: the analysis plugin object (instance of AnalysisPlugin).
knowledge_base_object: the knowledge base object (instance of
KnowledgeBase).
Returns:
An event object queue object (instance of Queue).
"""
analysis_report_queue = single_process.SingleProcessQueue()
analysis_report_queue_consumer = TestAnalysisReportQueueConsumer(
analysis_report_queue)
analysis_report_queue_producer = queue.ItemQueueProducer(
analysis_report_queue)
analysis_context = context.AnalysisContext(
analysis_report_queue_producer, knowledge_base_object)
analysis_plugin.RunPlugin(analysis_context)
analysis_report_queue.SignalEndOfInput()
return analysis_report_queue_consumer
def _SetUpKnowledgeBase(self, knowledge_base_values=None):
"""Sets up a knowledge base.
Args:
knowledge_base_values: optional dict containing the knowledge base
values. The default is None.
Returns:
An knowledge base object (instance of KnowledgeBase).
"""
knowledge_base_object = knowledge_base.KnowledgeBase()
if knowledge_base_values:
for identifier, value in knowledge_base_values.iteritems():
knowledge_base_object.SetValue(identifier, value)
return knowledge_base_object
+267
View File
@@ -0,0 +1,267 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A plugin to enable quick triage of Windows Services."""
from plaso.analysis import interface
from plaso.lib import event
from plaso.winnt import human_readable_service_enums
# Moving this import to the bottom due to complaints from certain versions of
# linters.
import yaml
class WindowsService(yaml.YAMLObject):
"""Class to represent a Windows Service."""
# This is used for comparison operations and defines attributes that should
# not be used during evaluation of whether two services are the same.
COMPARE_EXCLUDE = frozenset(['sources'])
KEY_PATH_SEPARATOR = u'\\'
# YAML attributes
yaml_tag = u'!WindowsService'
yaml_loader = yaml.SafeLoader
yaml_dumper = yaml.SafeDumper
def __init__(self, name, service_type, image_path, start_type, object_name,
source, service_dll=None):
"""Initializes a new Windows service object.
Args:
name: The name of the service
service_type: The value of the Type value of the service key.
image_path: The value of the ImagePath value of the service key.
start_type: The value of the Start value of the service key.
object_name: The value of the ObjectName value of the service key.
source: A tuple of (pathspec, Registry key) describing where the
service was found
service_dll: Optional string value of the ServiceDll value in the
service's Parameters subkey. The default is None.
Raises:
TypeError: If a tuple with two elements is not passed as the 'source'
argument.
"""
self.name = name
self.service_type = service_type
self.image_path = image_path
self.start_type = start_type
self.service_dll = service_dll
self.object_name = object_name
if isinstance(source, tuple):
if len(source) != 2:
raise TypeError(u'Source arguments must be tuple of length 2.')
# A service may be found in multiple Control Sets or Registry hives,
# hence the list.
self.sources = [source]
else:
raise TypeError(u'Source argument must be a tuple.')
self.anomalies = []
@classmethod
def FromEvent(cls, service_event):
"""Creates a Service object from an plaso event.
Args:
service_event: The event object (instance of EventObject) to create a new
Service object from.
"""
_, _, name = service_event.keyname.rpartition(
WindowsService.KEY_PATH_SEPARATOR)
service_type = service_event.regvalue.get('Type')
image_path = service_event.regvalue.get('ImagePath')
start_type = service_event.regvalue.get('Start')
service_dll = service_event.regvalue.get('ServiceDll', u'')
object_name = service_event.regvalue.get('ObjectName', u'')
if service_event.pathspec:
source = (service_event.pathspec.location, service_event.keyname)
else:
source = (u'Unknown', u'Unknown')
return cls(
name=name, service_type=service_type, image_path=image_path,
start_type=start_type, object_name=object_name,
source=source, service_dll=service_dll)
def HumanReadableType(self):
"""Return a human readable string describing the type value."""
return human_readable_service_enums.SERVICE_ENUMS['Type'].get(
self.service_type, u'{0:d}'.format(self.service_type))
def HumanReadableStartType(self):
"""Return a human readable string describing the start_type value."""
return human_readable_service_enums.SERVICE_ENUMS['Start'].get(
self.start_type, u'{0:d}'.format(self.start_type))
def __eq__(self, other_service):
"""Custom equality method so that we match near-duplicates.
Compares two service objects together and evaluates if they are
the same or close enough to be considered to represent the same service.
For two service objects to be considered the same they need to
have the the same set of attributes and same values for all their
attributes, other than those enumerated as reserved in the
COMPARE_EXCLUDE constant.
Args:
other_service: The service (instance of WindowsService) we are testing
for equality.
Returns:
A boolean value to indicate whether the services are equal.
"""
if not isinstance(other_service, WindowsService):
return False
attributes = set(self.__dict__.keys())
other_attributes = set(self.__dict__.keys())
if attributes != other_attributes:
return False
# We compare the values for all attributes, other than those specifically
# enumerated as not relevant for equality comparisons.
for attribute in attributes.difference(self.COMPARE_EXCLUDE):
if getattr(self, attribute, None) != getattr(
other_service, attribute, None):
return False
return True
class WindowsServiceCollection(object):
"""Class to hold and de-duplicate Windows Services."""
def __init__(self):
"""Initialize a collection that holds Windows Service."""
self._services = []
def AddService(self, new_service):
"""Add a new service to the list of ones we know about.
Args:
new_service: The service (instance of WindowsService) to add.
"""
for service in self._services:
if new_service == service:
# If this service is the same as one we already know about, we
# just want to add where it came from.
service.sources.append(new_service.sources[0])
return
# We only add a new object to our list if we don't have
# an identical one already.
self._services.append(new_service)
@property
def services(self):
"""Get the services in this collection."""
return self._services
class AnalyzeWindowsServicesPlugin(interface.AnalysisPlugin):
"""Provides a single list of for Windows services found in the Registry."""
NAME = 'windows_services'
# Indicate that we can run this plugin during regular extraction.
ENABLE_IN_EXTRACTION = True
ARGUMENTS = [
('--windows-services-output', {
'dest': 'windows-services-output',
'type': unicode,
'help': 'Specify how the results should be displayed. Options are '
'text and yaml.',
'action': 'store',
'default': u'text',
'choices': [u'text', u'yaml']}),]
def __init__(self, incoming_queue, options=None):
"""Initializes the Windows Services plugin
Args:
incoming_queue: A queue to read events from.
options: Optional command line arguments (instance of
argparse.Namespace). The default is None.
"""
super(AnalyzeWindowsServicesPlugin, self).__init__(incoming_queue)
self._service_collection = WindowsServiceCollection()
self.plugin_type = interface.AnalysisPlugin.TYPE_REPORT
self._output_mode = getattr(options, 'windows-services-output', u'text')
def ExamineEvent(self, analysis_context, event_object, **kwargs):
"""Analyzes an event_object and creates Windows Services as required.
At present, this method only handles events extracted from the Registry.
Args:
analysis_context: The context object analysis plugins.
event_object: The event object (instance of EventObject) to examine.
"""
# TODO: Handle event log entries here also (ie, event id 4697).
if getattr(event_object, 'data_type', None) != 'windows:registry:service':
return
else:
# Create and store the service.
service = WindowsService.FromEvent(event_object)
self._service_collection.AddService(service)
def _FormatServiceText(self, service):
"""Produces a human readable multi-line string representing the service.
Args:
service: The service (instance of WindowsService) to format.
"""
string_segments = [
service.name,
u'\tImage Path = {0:s}'.format(service.image_path),
u'\tService Type = {0:s}'.format(service.HumanReadableType()),
u'\tStart Type = {0:s}'.format(service.HumanReadableStartType()),
u'\tService Dll = {0:s}'.format(service.service_dll),
u'\tObject Name = {0:s}'.format(service.object_name),
u'\tSources:']
for source in service.sources:
string_segments.append(u'\t\t{0:s}:{1:s}'.format(source[0], source[1]))
return u'\n'.join(string_segments)
def CompileReport(self):
"""Compiles a report of the analysis.
Returns:
The analysis report (instance of AnalysisReport).
"""
report = event.AnalysisReport()
if self._output_mode == 'yaml':
lines_of_text = []
lines_of_text.append(
yaml.safe_dump_all(self._service_collection.services))
else:
lines_of_text = ['Listing Windows Services']
for service in self._service_collection.services:
lines_of_text.append(self._FormatServiceText(service))
# Separate services with a blank line.
lines_of_text.append(u'')
report.SetText(lines_of_text)
return report
+192
View File
@@ -0,0 +1,192 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the windows services analysis plugin."""
import argparse
import unittest
from dfvfs.path import fake_path_spec
from plaso.analysis import test_lib
from plaso.analysis import windows_services
from plaso.engine import queue
from plaso.engine import single_process
from plaso.events import windows_events
from plaso.parsers import winreg
class WindowsServicesTest(test_lib.AnalysisPluginTestCase):
"""Tests for the Windows Services analysis plugin."""
SERVICE_EVENTS = [
{u'path': u'\\ControlSet001\\services\\TestbDriver',
u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2,
u'Start': 2, u'ObjectName': u''},
u'timestamp': 1346145829002031},
# This is almost the same, but different timestamp and source, so that
# we can test the service de-duplication.
{u'path': u'\\ControlSet003\\services\\TestbDriver',
u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2,
u'Start': 2, u'ObjectName': u''},
u'timestamp': 1346145839002031},
]
def _CreateAnalysisPlugin(self, input_queue, output_mode):
"""Create an analysis plugin to test with.
Args:
input_queue: A queue the plugin will read events from.
output_mode: The output format the plugin will use.
Valid options are 'text' and 'yaml'.
Returns:
An instance of AnalyzeWindowsServicesPlugin.
"""
argument_parser = argparse.ArgumentParser()
plugin_args = windows_services.AnalyzeWindowsServicesPlugin.ARGUMENTS
for parameter, config in plugin_args:
argument_parser.add_argument(parameter, **config)
arguments = ['--windows-services-output', output_mode]
options = argument_parser.parse_args(arguments)
analysis_plugin = windows_services.AnalyzeWindowsServicesPlugin(
input_queue, options)
return analysis_plugin
def _CreateTestEventObject(self, service_event):
"""Create a test event object with a particular path.
Args:
service_event: A hash containing attributes of an event to add to the
queue.
Returns:
An EventObject representing the service to be created.
"""
test_pathspec = fake_path_spec.FakePathSpec(
location=u'C:\\WINDOWS\\system32\\SYSTEM')
event_object = windows_events.WindowsRegistryServiceEvent(
service_event[u'timestamp'], service_event[u'path'],
service_event[u'text_dict'])
event_object.pathspec = test_pathspec
return event_object
def testSyntheticKeysText(self):
"""Test the plugin against mock events."""
event_queue = single_process.SingleProcessQueue()
# Fill the incoming queue with events.
test_queue_producer = queue.ItemQueueProducer(event_queue)
events = [self._CreateTestEventObject(service_event)
for service_event
in self.SERVICE_EVENTS]
test_queue_producer.ProduceItems(events)
test_queue_producer.SignalEndOfInput()
# Initialize plugin.
analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text')
# Run the analysis plugin.
knowledge_base = self._SetUpKnowledgeBase()
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
self.assertEquals(len(analysis_reports), 1)
analysis_report = analysis_reports[0]
expected_text = (
u'Listing Windows Services\n'
u'TestbDriver\n'
u'\tImage Path = C:\\Dell\\testdriver.sys\n'
u'\tService Type = File System Driver (0x2)\n'
u'\tStart Type = Auto Start (2)\n'
u'\tService Dll = \n'
u'\tObject Name = \n'
u'\tSources:\n'
u'\t\tC:\\WINDOWS\\system32\\SYSTEM:'
u'\\ControlSet001\\services\\TestbDriver\n'
u'\t\tC:\\WINDOWS\\system32\\SYSTEM:'
u'\\ControlSet003\\services\\TestbDriver\n\n')
self.assertEquals(expected_text, analysis_report.text)
self.assertEquals(analysis_report.plugin_name, 'windows_services')
def testRealEvents(self):
"""Test the plugin with text output against real events from the parser."""
parser = winreg.WinRegistryParser()
# We could remove the non-Services plugins, but testing shows that the
# performance gain is negligible.
knowledge_base = self._SetUpKnowledgeBase()
test_path = self._GetTestFilePath(['SYSTEM'])
event_queue = self._ParseFile(parser, test_path, knowledge_base)
# Run the analysis plugin.
analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text')
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
report = analysis_reports[0]
text = report.text
# We'll check that a few strings are in the report, like they're supposed
# to be, rather than checking for the exact content of the string,
# as that's dependent on the full path to the test files.
test_strings = [u'1394ohci', u'WwanSvc', u'Sources:', u'ControlSet001',
u'ControlSet002']
for string in test_strings:
self.assertTrue(string in text)
def testRealEventsYAML(self):
"""Test the plugin with YAML output against real events from the parser."""
parser = winreg.WinRegistryParser()
# We could remove the non-Services plugins, but testing shows that the
# performance gain is negligible.
knowledge_base = self._SetUpKnowledgeBase()
test_path = self._GetTestFilePath(['SYSTEM'])
event_queue = self._ParseFile(parser, test_path, knowledge_base)
# Run the analysis plugin.
analysis_plugin = self._CreateAnalysisPlugin(event_queue, 'yaml')
analysis_report_queue_consumer = self._RunAnalysisPlugin(
analysis_plugin, knowledge_base)
analysis_reports = self._GetAnalysisReportsFromQueue(
analysis_report_queue_consumer)
report = analysis_reports[0]
text = report.text
# We'll check that a few strings are in the report, like they're supposed
# to be, rather than checking for the exact content of the string,
# as that's dependent on the full path to the test files.
test_strings = [windows_services.WindowsService.yaml_tag, u'1394ohci',
u'WwanSvc', u'ControlSet001', u'ControlSet002']
for string in test_strings:
self.assertTrue(string in text, u'{0:s} not found in report text'.format(
string))
if __name__ == '__main__':
unittest.main()
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+137
View File
@@ -0,0 +1,137 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The artifact knowledge base object.
The knowledge base is filled by user provided input and the pre-processing
phase. It is intended to provide successive phases, like the parsing and
analysis phases, with essential information like e.g. the timezone and
codepage of the source data.
"""
from plaso.lib import event
import pytz
class KnowledgeBase(object):
"""Class that implements the artifact knowledge base."""
def __init__(self):
"""Initialize the knowledge base object."""
super(KnowledgeBase, self).__init__()
# TODO: the first versions of the knowledge base will wrap the pre-process
# object, but this should be replaced by an artifact style knowledge base
# or artifact cache.
self._pre_obj = event.PreprocessObject()
self._default_codepage = u'cp1252'
self._default_timezone = pytz.timezone('UTC')
@property
def pre_obj(self):
"""The pre-process object."""
return self._pre_obj
@property
def codepage(self):
"""The codepage."""
return getattr(self._pre_obj, 'codepage', self._default_codepage)
@property
def hostname(self):
"""The hostname."""
return getattr(self._pre_obj, 'hostname', u'')
@property
def platform(self):
"""The platform."""
return getattr(self._pre_obj, 'guessed_os', u'')
@platform.setter
def platform(self, value):
"""The platform."""
return setattr(self._pre_obj, 'guessed_os', value)
@property
def timezone(self):
"""The timezone object."""
return getattr(self._pre_obj, 'zone', self._default_timezone)
@property
def users(self):
"""The list of users."""
return getattr(self._pre_obj, 'users', [])
@property
def year(self):
"""The year."""
return getattr(self._pre_obj, 'year', 0)
def GetUsernameByIdentifier(self, identifier):
"""Retrieves the username based on an identifier.
Args:
identifier: the identifier, either a UID or SID.
Returns:
The username or - if not available.
"""
if not identifier:
return u'-'
return self._pre_obj.GetUsernameById(identifier)
def GetValue(self, identifier, default_value=None):
"""Retrieves a value by identifier.
Args:
identifier: the value identifier.
default_value: optional default value. The default is None.
Returns:
The value or None if not available.
"""
return getattr(self._pre_obj, identifier, default_value)
def SetDefaultCodepage(self, codepage):
"""Sets the default codepage.
Args:
codepage: the default codepage.
"""
# TODO: check if value is sane.
self._default_codepage = codepage
def SetDefaultTimezone(self, timezone):
"""Sets the default timezone.
Args:
timezone: the default timezone.
"""
# TODO: check if value is sane.
self._default_timezone = timezone
def SetValue(self, identifier, value):
"""Sets a value by identifier.
Args:
identifier: the value identifier.
value: the value.
"""
setattr(self._pre_obj, identifier, value)
+16
View File
@@ -0,0 +1,16 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+184
View File
@@ -0,0 +1,184 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the format classifier classes.
Plaso is a tool that extracts events from files on a file system.
For this it either reads files from a mounted file system or from an image.
It uses an exhaustive approach to determine parse events from a file, meaning
that it passes the file first to parser A and if that fails it continues with
parser B.
The classifier is designed to be able to more quickly determine the format of
a file and limit the number of parsers part of the exhaustive approach.
The current version of the classifier uses signatures to identify file formats.
Some signatures must always be defined at a specific offset, this is referred to
as an offset-bound signature or bound for short. Other signatures are commonly
found at a specific offset but not necessarily. The last form of signatures is
unbound, meaning that they don't have a fixed or common location where they can
be found.
A specification is a collection of signatures with additional metadata that
defines a specific file format. These specifications are grouped into a store
for ease of use, e.g. so that they can be read from a configuration file all
at once.
The classifier requires a scanner to analyze the data in a file. The scanner
uses the specifications in a store to scan for the signatures or a certain
format.
The classifier allows for multiple methods of scanning a file:
* full: the entire file is scanned. This is the default scanning method.
* head-tail: only the beginning (head) and the end (tail) of the file is
scanned. This approach is more efficient for larger files.
The buffer size is used as the size of the data that is scanned.
Smaller files are scanned entirely.
The classifier returns zero or more classifications which point to a format
specification and the scan results for the signatures defined by
the specification.
"""
import logging
class Classification(object):
"""This class represents a format classification.
The format classification consists of a format specification and
scan results.
"""
def __init__(self, specification, scan_matches):
"""Initializes the classification.
Args:
specification: the format specification (instance of Specification).
scan_matches: the list of scan matches (instances of _ScanMatch).
Raises:
TypeError: if the specification is not of type Specification.
"""
self._specification = specification
self.scan_matches = scan_matches
@property
def identifier(self):
"""The classification type."""
return self._specification.identifier
@property
def magic_types(self):
"""The magic types or an empty list if none."""
return self._specification.magic_types
@property
def mime_types(self):
"""The mime type or an empty list if none."""
return self._specification.mime_types
class Classifier(object):
"""Class for classifying formats in raw data.
The classifier is initialized with one or more specifications.
After which it can be used to classify data in files or file-like objects.
The actual scanning of the data is done by the scanner, these are separate
to allow for the scanner to easily be replaced for a more efficient
alternative if necessary.
For an example of how the classifier is to be used see: classify.py.
"""
BUFFER_SIZE = 16 * 1024 * 1024
def __init__(self, scanner):
"""Initializes the classifier and sets up the scanning related structures.
Args:
scanner: an instance of the signature scanner.
"""
self._scanner = scanner
def _GetClassifications(self, scan_results):
"""Retrieves the classifications based on the scan results.
Multiple scan results are combined into a single classification.
Args:
scan_results: a list containing instances of _ScanResult.
Returns:
a list of instances of Classification.
"""
classifications = {}
for scan_result in scan_results:
for scan_match in scan_result.scan_matches:
logging.debug(
u'scan match at offset: 0x{0:08x} specification: {1:s}'.format(
scan_match.total_data_offset, scan_result.identifier))
if scan_result.identifier not in classifications:
classifications[scan_result.identifier] = Classification(
scan_result.specification, scan_result.scan_matches)
return classifications.values()
def ClassifyBuffer(self, data, data_size):
"""Classifies the data in a buffer, assumes all necessary data is available.
Args:
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
Returns:
a list of classifications or an empty list.
"""
scan_state = self._scanner.StartScan()
self._scanner.ScanBuffer(scan_state, data, data_size)
self._scanner.StopScan(scan_state)
return self._GetClassifications(scan_state.GetResults())
def ClassifyFileObject(self, file_object):
"""Classifies the data in a file-like object.
Args:
file_object: a file-like object.
Returns:
a list of classifier classifications or an empty list.
"""
scan_results = self._scanner.ScanFileObject(file_object)
return self._GetClassifications(scan_results)
def ClassifyFile(self, filename):
"""Classifies the data in a file.
Args:
filename: the name of the file.
Returns:
a list of classifier classifications or an empty list.
"""
classifications = []
with open(filename, 'rb') as file_object:
classifications = self.ClassifyFileObject(file_object)
return classifications
+72
View File
@@ -0,0 +1,72 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the format classifier classes."""
import os
import unittest
from plaso.classifier import classifier
from plaso.classifier import scanner
from plaso.classifier import test_lib
class ClassifierTest(unittest.TestCase):
"""Class to test Classifier."""
def setUp(self):
"""Function to test the initialize function."""
self._store = test_lib.CreateSpecificationStore()
self._test_file1 = os.path.join('test_data', 'NTUSER.DAT')
self._test_file2 = os.path.join('test_data', 'syslog.zip')
def testClassifyFileWithScanner(self):
"""Function to test the classify file function."""
test_scanner = scanner.Scanner(self._store)
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file1)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file2)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
def testClassifyFileWithOffsetBoundScanner(self):
"""Function to test the classify file function."""
test_scanner = scanner.OffsetBoundScanner(self._store)
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file1)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
test_classifier = classifier.Classifier(test_scanner)
classifications = test_classifier.ClassifyFile(self._test_file2)
self.assertEqual(len(classifications), 1)
# TODO: assert the contents of the classification.
if __name__ == "__main__":
unittest.main()
+78
View File
@@ -0,0 +1,78 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a small classify test program."""
import argparse
import glob
import logging
from plaso.classifier import classifier
from plaso.classifier import scanner
from plaso.classifier import test_lib
def Main():
args_parser = argparse.ArgumentParser(
description='Classify test program.')
args_parser.add_argument(
'-t', '--type', type='choice', metavar='TYPE', action='store',
dest='scanner_type', choices=['scan-tree', 'scan_tree'],
default='scan-tree', help='The scanner type')
args_parser.add_argument(
'-v', '--verbose', action='store_true', dest='verbose', default=False,
help='Print verbose output')
args_parser.add_argument(
'filenames', nargs='+', action='store', metavar='FILENAMES',
default=None, help='The input filename(s) to classify.')
options = args_parser.parse_args()
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
files_to_classify = []
for input_glob in options.filenames:
files_to_classify += glob.glob(input_glob)
store = test_lib.CreateSpecificationStore()
if options.scanner_type not in ['scan-tree', 'scan_tree']:
print u'Unsupported scanner type defaulting to: scan-tree'
scan = scanner.Scanner(store)
classify = classifier.Classifier(scan)
for input_filename in files_to_classify:
classifications = classify.ClassifyFile(input_filename)
print u'File: {0:s}'.format(input_filename)
if not classifications:
print u'No classifications found.'
else:
print u'Classifications:'
for classification in classifications:
print u'\tformat: {0:s}'.format(classification.identifier)
print u''
if __name__ == '__main__':
Main()
+308
View File
@@ -0,0 +1,308 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The patterns classes used by the scan tree-based format scanner."""
class _ByteValuePatterns(object):
"""Class that implements a mapping between byte value and patterns.
The byte value patterns are used in the scan tree-based format scanner
to map a byte value to one or more patterns.
"""
def __init__(self, byte_value):
"""Initializes the pattern table (entry) byte value.
Args:
byte_value: the byte value that maps the patterns in the table.
"""
super(_ByteValuePatterns, self).__init__()
self.byte_value = byte_value
self.patterns = {}
def __unicode__(self):
"""Retrieves a string representation of the byte value patterns."""
return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns)
def AddPattern(self, pattern):
"""Adds a pattern.
Args:
pattern: the pattern (instance of Pattern).
Raises:
ValueError: if the table entry already contains a pattern
with the same identifier.
"""
if pattern.identifier in self.patterns:
raise ValueError(u'Pattern {0:s} is already defined.'.format(
pattern.identifier))
self.patterns[pattern.identifier] = pattern
def ToDebugString(self, indentation_level=1):
"""Converts the byte value pattern into a debug string."""
indentation = u' ' * indentation_level
header = u'{0:s}byte value: 0x{1:02x}\n'.format(
indentation, ord(self.byte_value))
entries = u''.join([u'{0:s} patterns: {1:s}\n'.format(
indentation, identifier) for identifier in self.patterns])
return u''.join([header, entries, u'\n'])
class _SkipTable(object):
"""Class that implements a skip table.
The skip table is used in the scan tree-based format scanner to determine
the skip value for the BoyerMooreHorspool search.
"""
def __init__(self, skip_pattern_length):
"""Initializes the skip table.
Args:
skip_pattern_length: the (maximum) skip pattern length.
"""
super(_SkipTable, self).__init__()
self._skip_value_per_byte_value = {}
self.skip_pattern_length = skip_pattern_length
def __getitem__(self, key):
"""Retrieves a specific skip value.
Args:
key: the byte value within the skip table.
Returns:
the skip value for the key or the maximim skip value
if no corresponding key was found.
"""
if key in self._skip_value_per_byte_value:
return self._skip_value_per_byte_value[key]
return self.skip_pattern_length
def SetSkipValue(self, byte_value, skip_value):
"""Sets a skip value.
Args:
byte_value: the corresponding byte value.
skip_value: the number of bytes to skip.
Raises:
ValueError: if byte value or skip value is out of bounds.
"""
if byte_value < 0 or byte_value > 255:
raise ValueError(u'Invalid byte value, value out of bounds.')
if skip_value < 0 or skip_value >= self.skip_pattern_length:
raise ValueError(u'Invalid skip value, value out of bounds.')
if (not byte_value in self._skip_value_per_byte_value or
self._skip_value_per_byte_value[byte_value] > skip_value):
self._skip_value_per_byte_value[byte_value] = skip_value
def ToDebugString(self):
"""Converts the skip table into a debug string."""
header = u'Byte value\tSkip value\n'
entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format(
byte_value, self._skip_value_per_byte_value[byte_value])
for byte_value in self._skip_value_per_byte_value])
default = u'Default\t{0:d}\n'.format(self.skip_pattern_length)
return u''.join([header, entries, default, u'\n'])
class Pattern(object):
"""Class that implements a pattern."""
def __init__(self, signature_index, signature, specification):
"""Initializes the pattern.
Args:
signature_index: the index of the signature within the specification.
signature: the signature (instance of Signature).
specification: the specification (instance of Specification) that
contains the signature.
"""
super(Pattern, self).__init__()
self._signature_index = signature_index
self.signature = signature
self.specification = specification
def __unicode__(self):
"""Retrieves a string representation."""
return self.identifier
@property
def expression(self):
"""The signature expression."""
return self.signature.expression
@property
def identifier(self):
"""The identifier."""
# Using _ here because some scanner implementation are limited to what
# characters can be used in the identifiers.
return u'{0:s}_{1:d}'.format(
self.specification.identifier, self._signature_index)
@property
def offset(self):
"""The signature offset."""
return self.signature.offset
@property
def is_bound(self):
"""Boolean value to indicate the signature is bound to an offset."""
return self.signature.is_bound
class PatternTable(object):
"""Class that implements a pattern table.
The pattern table is used in the the scan tree-based format scanner
to construct a scan tree. It contains either unbound patterns or
patterns bound to a specific offset.
"""
def __init__(self, patterns, ignore_list, is_bound=None):
"""Initializes and builds the patterns table from patterns.
Args:
patterns: a list of the patterns.
ignore_list: a list of pattern offsets to ignore.
is_bound: optional boolean value to indicate if the signatures are bound
to offsets. The default is None, which means the value should
be ignored and both bound and unbound patterns are considered
unbound.
Raises:
ValueError: if a signature pattern is too small to be useful (< 4).
"""
super(PatternTable, self).__init__()
self._byte_values_per_offset = {}
self.largest_pattern_length = 0
self.largest_pattern_offset = 0
self.patterns = []
self.smallest_pattern_length = 0
self.smallest_pattern_offset = 0
for pattern in patterns:
if is_bound is not None and pattern.signature.is_bound != is_bound:
continue
pattern_length = len(pattern.expression)
if pattern_length < 4:
raise ValueError(u'Pattern too small to be useful.')
self.smallest_pattern_length = min(
self.smallest_pattern_length, pattern_length)
self.largest_pattern_length = max(
self.largest_pattern_length, pattern_length)
self.patterns.append(pattern)
self._AddPattern(pattern, ignore_list, is_bound)
def _AddPattern(self, pattern, ignore_list, is_bound):
"""Adds the byte values per offset in the pattern to the table.
Args:
pattern: the pattern (instance of Pattern).
ignore_list: a list of pattern offsets to ignore.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
"""
pattern_offset = pattern.offset if is_bound else 0
self.smallest_pattern_offset = min(
self.smallest_pattern_offset, pattern_offset)
self.largest_pattern_offset = max(
self.largest_pattern_offset, pattern_offset)
for byte_value in pattern.expression:
if pattern_offset not in self._byte_values_per_offset:
self._byte_values_per_offset[pattern_offset] = {}
if pattern_offset not in ignore_list:
byte_values = self._byte_values_per_offset[pattern_offset]
if byte_value not in byte_values:
byte_values[byte_value] = _ByteValuePatterns(byte_value)
byte_value_patterns = byte_values[byte_value]
byte_value_patterns.AddPattern(pattern)
pattern_offset += 1
@property
def offsets(self):
"""The offsets."""
return self._byte_values_per_offset.keys()
def GetByteValues(self, pattern_offset):
"""Returns the bytes values for a specific pattern offset."""
return self._byte_values_per_offset[pattern_offset]
def GetSkipTable(self):
"""Retrieves the skip table for the patterns in the table.
Returns:
The skip table (instance of SkipTable).
"""
skip_table = _SkipTable(self.smallest_pattern_length)
for pattern in self.patterns:
if pattern.expression:
skip_value = self.smallest_pattern_length
for expression_index in range(0, self.smallest_pattern_length):
skip_value -= 1
skip_table.SetSkipValue(
ord(pattern.expression[expression_index]), skip_value)
return skip_table
def ToDebugString(self):
"""Converts the pattern table into a debug string."""
header = u'Pattern offset\tByte value(s)\n'
entries = u''
for pattern_offset in self._byte_values_per_offset:
entries += u'{0:d}'.format(pattern_offset)
byte_values = self._byte_values_per_offset[pattern_offset]
for byte_value in byte_values:
identifiers = u', '.join(
[identifier for identifier in byte_values[byte_value].patterns])
entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers)
entries += u'\n'
return u''.join([header, entries, u'\n'])
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The range list data type."""
class Range(object):
"""Class that implements a range object."""
def __init__(self, range_offset, range_size):
"""Initializes the range object.
Args:
range_offset: the range offset.
range_size: the range size.
Raises:
ValueError: if the range offset or range size is not valid.
"""
if range_offset < 0:
raise ValueError(u'Invalid range offset value.')
if range_size < 0:
raise ValueError(u'Invalid range size value.')
super(Range, self).__init__()
self.start_offset = range_offset
self.size = range_size
self.end_offset = range_offset + range_size
class RangeList(object):
"""Class that implements a range list object."""
def __init__(self):
"""Initializes the range list object."""
super(RangeList, self).__init__()
self.ranges = []
@property
def number_of_ranges(self):
"""The number of ranges."""
return len(self.ranges)
def GetSpanningRange(self):
"""Retrieves the range spanning the entire range list."""
if self.number_of_ranges == 0:
return
first_range = self.ranges[0]
last_range = self.ranges[-1]
range_size = last_range.end_offset - first_range.start_offset
return Range(first_range.start_offset, range_size)
def Insert(self, range_offset, range_size):
"""Inserts the range defined by the offset and size in the list.
Note that overlapping ranges will be merged.
Args:
range_offset: the range offset.
range_size: the range size.
Raises:
RuntimeError: if the range cannot be inserted.
ValueError: if the range offset or range size is not valid.
"""
if range_offset < 0:
raise ValueError(u'Invalid range offset value.')
if range_size < 0:
raise ValueError(u'Invalid range size value.')
insert_index = None
merge_index = None
number_of_range_objects = len(self.ranges)
range_end_offset = range_offset + range_size
if number_of_range_objects == 0:
insert_index = 0
else:
range_object_index = 0
for range_object in self.ranges:
# Ignore negative ranges.
if range_object.start_offset < 0:
range_object_index += 1
continue
# Insert the range before an existing one.
if range_end_offset < range_object.start_offset:
insert_index = range_object_index
break
# Ignore the range since the existing one overlaps it.
if (range_offset >= range_object.start_offset and
range_end_offset <= range_object.end_offset):
break
# Merge the range since it overlaps the existing one at the end.
if (range_offset >= range_object.start_offset and
range_offset <= range_object.end_offset):
merge_index = range_object_index
break
# Merge the range since it overlaps the existing one at the start.
if (range_end_offset >= range_object.start_offset and
range_end_offset <= range_object.end_offset):
merge_index = range_object_index
break
# Merge the range since it overlaps the existing one.
if (range_offset <= range_object.start_offset and
range_end_offset >= range_object.end_offset):
merge_index = range_object_index
break
range_object_index += 1
# Insert the range after the last one.
if range_object_index >= number_of_range_objects:
insert_index = number_of_range_objects
if insert_index is not None and merge_index is not None:
raise RuntimeError(
u'Unable to insert the range both insert and merge specified.')
if insert_index is not None:
self.ranges.insert(insert_index, Range(range_offset, range_size))
elif merge_index is not None:
range_object = self.ranges[merge_index]
if range_offset < range_object.start_offset:
range_object.size += range_object.start_offset - range_offset
range_object.start_offset = range_offset
if range_end_offset > range_object.end_offset:
range_object.size += range_end_offset - range_object.end_offset
range_object.end_offset = range_end_offset
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the range list."""
import unittest
from plaso.classifier import range_list
class RangeListTest(unittest.TestCase):
"""Class to test the range list."""
def testInsertPositiveRanges(self):
"""Function to test the insert function using positive ranges."""
range_list_object = range_list.RangeList()
# Test non-overlapping range.
range_list_object.Insert(500, 100)
self.assertEquals(range_list_object.number_of_ranges, 1)
range_object = range_list_object.ranges[0]
self.assertEquals(range_object.start_offset, 500)
self.assertEquals(range_object.end_offset, 600)
self.assertEquals(range_object.size, 100)
# Test non-overlapping range.
range_list_object.Insert(2000, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 2000)
self.assertEquals(range_object.end_offset, 2100)
self.assertEquals(range_object.size, 100)
# Test range that overlaps with an existing range at the start.
range_list_object.Insert(1950, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1950)
self.assertEquals(range_object.end_offset, 2100)
self.assertEquals(range_object.size, 150)
# Test range that overlaps with an existing range at the end.
range_list_object.Insert(2050, 100)
self.assertEquals(range_list_object.number_of_ranges, 2)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1950)
self.assertEquals(range_object.end_offset, 2150)
self.assertEquals(range_object.size, 200)
# Test non-overlapping range.
range_list_object.Insert(1000, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1000)
self.assertEquals(range_object.end_offset, 1100)
self.assertEquals(range_object.size, 100)
# Test range that aligns with an existing range at the end.
range_list_object.Insert(1100, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 1000)
self.assertEquals(range_object.end_offset, 1200)
self.assertEquals(range_object.size, 200)
# Test range that aligns with an existing range at the start.
range_list_object.Insert(900, 100)
self.assertEquals(range_list_object.number_of_ranges, 3)
range_object = range_list_object.ranges[1]
self.assertEquals(range_object.start_offset, 900)
self.assertEquals(range_object.end_offset, 1200)
self.assertEquals(range_object.size, 300)
# Test non-overlapping range.
range_list_object.Insert(0, 100)
self.assertEquals(range_list_object.number_of_ranges, 4)
range_object = range_list_object.ranges[0]
self.assertEquals(range_object.start_offset, 0)
self.assertEquals(range_object.end_offset, 100)
self.assertEquals(range_object.size, 100)
# Test invalid ranges.
with self.assertRaises(ValueError):
range_list_object.Insert(-1, 100)
with self.assertRaises(ValueError):
range_list_object.Insert(3000, -100)
if __name__ == '__main__':
unittest.main()
+744
View File
@@ -0,0 +1,744 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The scan tree classes used by the scan tree-based format scanner."""
import logging
from plaso.classifier import patterns
from plaso.classifier import range_list
class _PatternWeights(object):
"""Class that implements pattern weights."""
def __init__(self):
"""Initializes the pattern weights."""
super(_PatternWeights, self).__init__()
self._offsets_per_weight = {}
self._weight_per_offset = {}
def AddOffset(self, pattern_offset):
"""Adds a pattern offset and sets its weight to 0.
Args:
pattern_offset: the pattern offset to add to the pattern weights.
Raises:
ValueError: if the pattern weights already contains the pattern offset.
"""
if pattern_offset in self._weight_per_offset:
raise ValueError(u'Pattern offset already set.')
self._weight_per_offset[pattern_offset] = 0
def AddWeight(self, pattern_offset, weight):
"""Adds a weight for a specific pattern offset.
Args:
pattern_offset: the pattern offset to add to the pattern weights.
weight: the corresponding weight to add.
Raises:
ValueError: if the pattern weights does not contain the pattern offset.
"""
if pattern_offset not in self._weight_per_offset:
raise ValueError(u'Pattern offset not set.')
self._weight_per_offset[pattern_offset] += weight
if weight not in self._offsets_per_weight:
self._offsets_per_weight[weight] = []
self._offsets_per_weight[weight].append(pattern_offset)
def GetLargestWeight(self):
"""Retrieves the largest weight or 0 if none."""
if self._offsets_per_weight:
return max(self._offsets_per_weight)
return 0
def GetOffsetsForWeight(self, weight):
"""Retrieves the list of offsets for a specific weight."""
return self._offsets_per_weight[weight]
def GetWeightForOffset(self, pattern_offset):
"""Retrieves the weight for a specific pattern offset."""
return self._weight_per_offset[pattern_offset]
def ToDebugString(self):
"""Converts the pattern weights into a debug string."""
header1 = u'Pattern offset\tWeight\n'
entries1 = u''.join([u'{0:d}\t{1:d}\n'.format(
pattern_offset, self._weight_per_offset[pattern_offset])
for pattern_offset in self._weight_per_offset])
header2 = u'Weight\tPattern offset(s)\n'
entries2 = u''.join([u'{0:d}\t{1!s}\n'.format(
weight, self._offsets_per_weight[weight])
for weight in self._offsets_per_weight])
return u''.join([header1, entries1, u'\n', header2, entries2, u'\n'])
def SetWeight(self, pattern_offset, weight):
"""Sets a weight for a specific pattern offset.
Args:
pattern_offset: the pattern offset to set in the pattern weights.
weight: the corresponding weight to set.
Raises:
ValueError: if the pattern weights does not contain the pattern offset.
"""
if pattern_offset not in self._weight_per_offset:
raise ValueError(u'Pattern offset not set.')
self._weight_per_offset[pattern_offset] = weight
if weight not in self._offsets_per_weight:
self._offsets_per_weight[weight] = []
self._offsets_per_weight[weight].append(pattern_offset)
class ScanTree(object):
"""Class that implements a scan tree."""
_COMMON_BYTE_VALUES = frozenset(
'\x00\x01\xff\t\n\r 0123456789'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz')
# The offset must be positive, negative offsets are ignored.
OFFSET_MODE_POSITIVE = 1
# The offset must be negative, positive offsets are ignored.
OFFSET_MODE_NEGATIVE = 2
# The offset must be positive, an error is raised for negative offsets.
OFFSET_MODE_POSITIVE_STRICT = 3
# The offset must be negative, an error is raised for positive offsets.
OFFSET_MODE_NEGATIVE_STRICT = 4
def __init__(
self, specification_store, is_bound,
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
"""Initializes and builds the scan tree.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
offset_mode: optional value to indicate how the signature offsets should
be handled. The default is that the offset must be positive
and an error is raised for negative offsets.
"""
super(ScanTree, self).__init__()
self.largest_length = 0
self.pattern_list = []
self.range_list = range_list.RangeList()
self.root_node = None
self.skip_table = None
# First determine all the patterns from the specification store.
self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode)
# Next create the scan tree starting with the root node.
ignore_list = []
pattern_table = patterns.PatternTable(
self.pattern_list, ignore_list, is_bound)
if pattern_table.patterns:
self.root_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(u'Scan tree:\n{0:s}'.format(
self.root_node.ToDebugString()))
# At the end the skip table is determined to provide for the
# BoyerMooreHorspool skip value.
self.skip_table = pattern_table.GetSkipTable()
logging.debug(u'Skip table:\n{0:s}'.format(
self.skip_table.ToDebugString()))
self.largest_length = pattern_table.largest_pattern_length
def _BuildPatterns(
self, specification_store, is_bound,
offset_mode=OFFSET_MODE_POSITIVE_STRICT):
"""Builds the list of patterns.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
offset_mode: optional value to indicate how the signature offsets should
be handled. The default is that the offset must be positive
and an error is raised for negative offsets.
Raises:
ValueError: if a signature offset invalid according to specified offset
mode or a signature pattern is too small to be useful (< 4).
"""
self.pattern_list = []
for specification in specification_store.specifications:
signature_index = 0
for signature in specification.signatures:
if signature.expression:
signature_offset = signature.offset if is_bound else 0
signature_pattern_length = len(signature.expression)
# Make sure signature offset is numeric.
try:
signature_offset = int(signature_offset)
except (TypeError, ValueError):
signature_offset = 0
if signature_offset < 0:
if offset_mode == self.OFFSET_MODE_POSITIVE:
continue
elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT:
raise ValueError(u'Signature offset less than 0.')
# The range list does not allow offsets to be negative and thus
# the signature offset is turned into a positive equivalent.
signature_offset *= -1
# The signature size is substracted to make sure the spanning
# range will align with the original negative offset values.
signature_offset -= signature_pattern_length
elif signature_offset > 0:
if offset_mode == self.OFFSET_MODE_NEGATIVE:
continue
elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT:
raise ValueError(u'Signature offset greater than 0.')
if signature_pattern_length < 4:
raise ValueError(u'Signature pattern smaller than 4.')
pattern = patterns.Pattern(
signature_index, signature, specification)
self.pattern_list.append(pattern)
self.range_list.Insert(signature_offset, signature_pattern_length)
signature_index += 1
def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound):
"""Builds a scan tree node.
Args:
pattern_table: a pattern table (instance of PatternTable).
ignore_list: a list of pattern offsets to ignore
is_bound: boolean value to indicate if the signatures are bound
to offsets. A value of None indicates that the value should
be ignored and both bound and unbound patterns are considered
unbound.
Raises:
ValueError: if number of byte value patterns value out of bounds.
Returns:
A scan tree node (instance of ScanTreeNode).
"""
# Make a copy of the lists because the function is going to alter them
# and the changes must remain in scope of the function.
pattern_list = list(pattern_table.patterns)
ignore_list = list(ignore_list)
similarity_weights = _PatternWeights()
occurrence_weights = _PatternWeights()
value_weights = _PatternWeights()
for pattern_offset in pattern_table.offsets:
similarity_weights.AddOffset(pattern_offset)
occurrence_weights.AddOffset(pattern_offset)
value_weights.AddOffset(pattern_offset)
byte_values = pattern_table.GetByteValues(pattern_offset)
number_of_byte_values = len(byte_values)
if number_of_byte_values > 1:
occurrence_weights.SetWeight(pattern_offset, number_of_byte_values)
for byte_value in byte_values:
byte_value_patterns = byte_values[byte_value]
byte_value_weight = len(byte_value_patterns.patterns)
if byte_value_weight > 1:
similarity_weights.AddWeight(pattern_offset, byte_value_weight)
if byte_value_weight not in self._COMMON_BYTE_VALUES:
value_weights.AddWeight(pattern_offset, 1)
logging.debug(u'Pattern table:\n{0:s}'.format(
pattern_table.ToDebugString()))
logging.debug(u'Similarity weights:\n{0:s}'.format(
similarity_weights.ToDebugString()))
logging.debug(u'Occurrence weights:\n{0:s}'.format(
occurrence_weights.ToDebugString()))
logging.debug(u'Value weights:\n{0:s}'.format(
value_weights.ToDebugString()))
pattern_offset = self._GetMostSignificantPatternOffset(
pattern_list, similarity_weights, occurrence_weights, value_weights)
ignore_list.append(pattern_offset)
# For the scan tree negative offsets are adjusted so that
# the smallest pattern offset is 0.
scan_tree_pattern_offset = pattern_offset
if scan_tree_pattern_offset < 0:
scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset
scan_tree_node = ScanTreeNode(scan_tree_pattern_offset)
byte_values = pattern_table.GetByteValues(pattern_offset)
for byte_value in byte_values:
byte_value_patterns = byte_values[byte_value]
logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString()))
number_of_byte_value_patterns = len(byte_value_patterns.patterns)
if number_of_byte_value_patterns <= 0:
raise ValueError(
u'Invalid number of byte value patterns value out of bounds.')
elif number_of_byte_value_patterns == 1:
for identifier in byte_value_patterns.patterns:
logging.debug(
u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format(
identifier, ord(byte_value)))
scan_tree_node.AddByteValue(
byte_value, byte_value_patterns.patterns[identifier])
else:
pattern_table = patterns.PatternTable(
byte_value_patterns.patterns.itervalues(), ignore_list, is_bound)
scan_sub_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(
u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format(
ord(byte_value), scan_sub_node.ToDebugString()))
scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node)
for identifier in byte_value_patterns.patterns:
logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format(
identifier, self._PatternsToDebugString(pattern_list)))
pattern_list.remove(byte_value_patterns.patterns[identifier])
logging.debug(u'Remaining patterns:\n{0:s}'.format(
self._PatternsToDebugString(pattern_list)))
number_of_patterns = len(pattern_list)
if number_of_patterns == 1:
logging.debug(u'Setting pattern: {0:s} for default value'.format(
pattern_list[0].identifier))
scan_tree_node.SetDefaultValue(pattern_list[0])
elif number_of_patterns > 1:
pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound)
scan_sub_node = self._BuildScanTreeNode(
pattern_table, ignore_list, is_bound)
logging.debug(u'Setting scan node for default value:\n{0:s}'.format(
scan_sub_node.ToDebugString()))
scan_tree_node.SetDefaultValue(scan_sub_node)
return scan_tree_node
def _GetMostSignificantPatternOffset(
self, pattern_list, similarity_weights, occurrence_weights,
value_weights):
"""Returns the most significant pattern offset.
Args:
pattern_list: a list of patterns
similarity_weights: the similarity (pattern) weights.
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Raises:
ValueError: when pattern is an empty list.
Returns:
a pattern offset.
"""
if not pattern_list:
raise ValueError(u'Missing pattern list.')
pattern_offset = None
number_of_patterns = len(pattern_list)
if number_of_patterns == 1:
pattern_offset = self._GetPatternOffsetForValueWeights(
value_weights)
elif number_of_patterns == 2:
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
occurrence_weights, value_weights)
elif number_of_patterns > 2:
pattern_offset = self._GetPatternOffsetForSimilarityWeights(
similarity_weights, occurrence_weights, value_weights)
logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset))
return pattern_offset
def _GetPatternOffsetForOccurrenceWeights(
self, occurrence_weights, value_weights):
"""Returns the most significant pattern offset based on the value weights.
Args:
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Returns:
a pattern offset.
"""
debug_string = ""
pattern_offset = None
largest_weight = occurrence_weights.GetLargestWeight()
logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight(
largest_weight)
number_of_occurrence_offsets = len(occurrence_weight_offsets)
else:
number_of_occurrence_offsets = 0
if number_of_occurrence_offsets == 0:
pattern_offset = self._GetPatternOffsetForValueWeights(
value_weights)
elif number_of_occurrence_offsets == 1:
pattern_offset = occurrence_weight_offsets[0]
else:
largest_weight = 0
largest_value_weight = 0
for occurrence_offset in occurrence_weight_offsets:
value_weight = value_weights.GetWeightForOffset(
occurrence_offset)
debug_string = (
u'Occurrence offset: {0:d} value weight: {1:d}').format(
occurrence_offset, value_weight)
if not pattern_offset or largest_weight < value_weight:
largest_weight = value_weight
pattern_offset = occurrence_offset
debug_string += u' largest value weight: {0:d}'.format(
largest_value_weight)
logging.debug(u'{0:s}'.format(debug_string))
return pattern_offset
def _GetPatternOffsetForSimilarityWeights(
self, similarity_weights, occurrence_weights, value_weights):
"""Returns the most significant pattern offset.
Args:
similarity_weights: the similarity (pattern) weights.
occurrence_weights: the occurrence (pattern) weights.
value_weights: the value (pattern) weights.
Returns:
a pattern offset.
"""
debug_string = ""
pattern_offset = None
largest_weight = similarity_weights.GetLargestWeight()
logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
similarity_weight_offsets = similarity_weights.GetOffsetsForWeight(
largest_weight)
number_of_similarity_offsets = len(similarity_weight_offsets)
else:
number_of_similarity_offsets = 0
if number_of_similarity_offsets == 0:
pattern_offset = self._GetPatternOffsetForOccurrenceWeights(
occurrence_weights, value_weights)
elif number_of_similarity_offsets == 1:
pattern_offset = similarity_weight_offsets[0]
else:
largest_weight = 0
largest_value_weight = 0
for similarity_offset in similarity_weight_offsets:
occurrence_weight = occurrence_weights.GetWeightForOffset(
similarity_offset)
debug_string = (
u'Similarity offset: {0:d} occurrence weight: {1:d}').format(
similarity_offset, occurrence_weight)
if largest_weight > 0 and largest_weight == occurrence_weight:
value_weight = value_weights.GetWeightForOffset(
similarity_offset)
debug_string += u' value weight: {0:d}'.format(value_weight)
if largest_value_weight < value_weight:
largest_weight = 0
if not pattern_offset or largest_weight < occurrence_weight:
largest_weight = occurrence_weight
pattern_offset = similarity_offset
largest_value_weight = value_weights.GetWeightForOffset(
similarity_offset)
debug_string += u' largest value weight: {0:d}'.format(
largest_value_weight)
logging.debug(u'{0:s}'.format(debug_string))
return pattern_offset
def _GetPatternOffsetForValueWeights(
self, value_weights):
"""Returns the most significant pattern offset based on the value weights.
Args:
value_weights: the value (pattern) weights.
Raises:
RuntimeError: no value weight offset were found.
Returns:
a pattern offset.
"""
largest_weight = value_weights.GetLargestWeight()
logging.debug(u'Largest value weight: {0:d}'.format(largest_weight))
if largest_weight > 0:
value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight)
number_of_value_offsets = len(value_weight_offsets)
else:
number_of_value_offsets = 0
if number_of_value_offsets == 0:
raise RuntimeError(u'No value weight offsets found.')
return value_weight_offsets[0]
def _PatternsToDebugString(self, pattern_list):
"""Converts the list of patterns into a debug string."""
entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list])
return u''.join([u'[', entries, u']'])
class ScanTreeNode(object):
"""Class that implements a scan tree node."""
def __init__(self, pattern_offset):
"""Initializes the scan tree node.
Args:
pattern_offset: the offset in the pattern to which the node
applies.
"""
super(ScanTreeNode, self).__init__()
self._byte_values = {}
self.default_value = None
self.parent = None
self.pattern_offset = pattern_offset
def AddByteValue(self, byte_value, scan_object):
"""Adds a byte value.
Args:
byte_value: the corresponding byte value.
scan_object: the scan object, either a scan sub node or a pattern.
Raises:
ValueError: if byte value is out of bounds or if the node already
contains a scan object for the byte value.
"""
if isinstance(byte_value, str):
byte_value = ord(byte_value)
if byte_value < 0 or byte_value > 255:
raise ValueError(u'Invalid byte value, value out of bounds.')
if byte_value in self._byte_values:
raise ValueError(u'Byte value already set.')
if isinstance(scan_object, ScanTreeNode):
scan_object.parent = self
self._byte_values[byte_value] = scan_object
def CompareByteValue(
self, data, data_offset, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the bounded scan tree.
This function will return partial matches on the ata block block
boundary as long as the total data size has not been reached.
Args:
data: a buffer containing raw data.
data_offset: the offset in the raw data in the buffer.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
the resulting scan object which is either a ScanTreeNode or Pattern
or None.
Raises:
RuntimeError: if the data offset, total data offset, total data size
or pattern offset value is out of bounds.
"""
found_match = False
scan_tree_byte_value = 0
if data_offset < 0 or data_offset >= data_size:
raise RuntimeError(u'Invalid data offset, value out of bounds.')
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size, value out of bounds.')
if total_data_offset < 0 or (
total_data_size is not None and total_data_offset >= total_data_size):
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
if (total_data_size is not None and
total_data_offset + data_size >= total_data_size):
match_on_boundary = True
else:
match_on_boundary = False
data_offset += self.pattern_offset
if not match_on_boundary and data_offset >= data_size:
raise RuntimeError(u'Invalid pattern offset value, out of bounds.')
if data_offset < data_size:
data_byte_value = ord(data[data_offset])
for scan_tree_byte_value in self._byte_values:
if data_byte_value == scan_tree_byte_value:
found_match = True
break
if found_match:
scan_object = self._byte_values[scan_tree_byte_value]
logging.debug(
u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset)
)
else:
scan_object = self.default_value
if not scan_object:
scan_object = self.parent
while scan_object and not scan_object.default_value:
scan_object = scan_object.parent
if scan_object:
scan_object = scan_object.default_value
return scan_object
def SetDefaultValue(self, scan_object):
"""Sets the default (non-match) value.
Args:
scan_object: the scan object, either a scan sub node or a pattern.
Raises:
ValueError: if the default value is already set.
"""
if self.default_value:
raise ValueError(u'Default value already set.')
self.default_value = scan_object
def ToDebugString(self, indentation_level=1):
"""Converts the scan tree node into a debug string."""
indentation = u' ' * indentation_level
header = u'{0:s}pattern offset: {1:d}\n'.format(
indentation, self.pattern_offset)
entries = u''
for byte_value in self._byte_values:
entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value)
if isinstance(self._byte_values[byte_value], ScanTreeNode):
entries += u'{0:s}scan tree node:\n'.format(indentation)
entries += self._byte_values[byte_value].ToDebugString(
indentation_level + 1)
elif isinstance(self._byte_values[byte_value], patterns.Pattern):
entries += u'{0:s}pattern: {1:s}\n'.format(
indentation, self._byte_values[byte_value].identifier)
default = u'{0:s}default value:\n'.format(indentation)
if isinstance(self.default_value, ScanTreeNode):
default += u'{0:s}scan tree node:\n'.format(indentation)
default += self.default_value.ToDebugString(indentation_level + 1)
elif isinstance(self.default_value, patterns.Pattern):
default += u'{0:s}pattern: {1:s}\n'.format(
indentation, self.default_value.identifier)
return u''.join([header, entries, default, u'\n'])
+74
View File
@@ -0,0 +1,74 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the scan tree classes."""
import unittest
from plaso.classifier import patterns
from plaso.classifier import scan_tree
from plaso.classifier import specification
class ScanTreeNodeTest(unittest.TestCase):
"""Class to test the scan tree node."""
def testAddByteValueWithPattern(self):
"""Function to test the add byte value with pattern function."""
scan_node = scan_tree.ScanTreeNode(0)
format_regf = specification.Specification('REGF')
format_regf.AddNewSignature('regf', offset=0)
format_esedb = specification.Specification('ESEDB')
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4)
signature_regf = specification.Signature('regf', offset=0)
pattern_regf = patterns.Pattern(0, signature_regf, format_regf)
pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb)
scan_node.AddByteValue('r', pattern_regf)
scan_node.AddByteValue('\xef', pattern_esedb)
self.assertRaises(
ValueError, scan_node.AddByteValue, 'r', pattern_regf)
self.assertRaises(
ValueError, scan_node.AddByteValue, -1, pattern_regf)
self.assertRaises(
ValueError, scan_node.AddByteValue, 256, pattern_regf)
def testAddByteValueWithScanNode(self):
"""Function to test the add byte value with scan node function."""
scan_node = scan_tree.ScanTreeNode(0)
scan_sub_node_0x41 = scan_tree.ScanTreeNode(1)
scan_sub_node_0x80 = scan_tree.ScanTreeNode(1)
scan_node.AddByteValue(0x41, scan_sub_node_0x41)
scan_node.AddByteValue(0x80, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80)
self.assertRaises(
ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80)
if __name__ == '__main__':
unittest.main()
+749
View File
@@ -0,0 +1,749 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the classes for a scan tree-based format scanner."""
import logging
import os
from plaso.classifier import patterns
from plaso.classifier import range_list
from plaso.classifier import scan_tree
class _ScanMatch(object):
"""Class that implements a scan match."""
def __init__(self, total_data_offset, pattern):
"""Initializes the scan result.
Args:
total_data_offset: the offset of the resulting match relative
to the start of the total data scanned.
pattern: the pattern matched.
"""
super(_ScanMatch, self).__init__()
self.total_data_offset = total_data_offset
self.pattern = pattern
@property
def specification(self):
"""The specification."""
return self.pattern.specification
class _ScanResult(object):
"""Class that implements a scan result."""
def __init__(self, specification):
"""Initializes the scan result.
Args:
scan_tree_node: the corresponding scan tree node or None.
"""
super(_ScanResult, self).__init__()
self.specification = specification
self.scan_matches = []
@property
def identifier(self):
"""The specification identifier."""
return self.specification.identifier
class ScanState(object):
"""Class that implements a scan state."""
# The state definitions.
_SCAN_STATE_START = 1
_SCAN_STATE_SCANNING = 2
_SCAN_STATE_STOP = 3
def __init__(self, scan_tree_node, total_data_size=None):
"""Initializes the scan state.
Args:
scan_tree_node: the corresponding scan tree node or None.
total_data_size: optional value to indicate the total data size.
The default is None.
"""
super(ScanState, self).__init__()
self._matches = []
self.remaining_data = None
self.remaining_data_size = 0
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_START
self.total_data_offset = 0
self.total_data_size = total_data_size
def AddMatch(self, total_data_offset, pattern):
"""Adds a result to the state to scanning.
Args:
total_data_offset: the offset of the resulting match relative
to the start total data scanned.
pattern: the pattern matched.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self._matches.append(_ScanMatch(total_data_offset, pattern))
def GetMatches(self):
"""Retrieves a list containing the results.
Returns:
A list of scan matches (instances of _ScanMatch).
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if self.state != self._SCAN_STATE_STOP:
raise RuntimeError(u'Unsupported scan state.')
return self._matches
def Reset(self, scan_tree_node):
"""Resets the state to start.
This function will clear the remaining data.
Args:
scan_tree_node: the corresponding scan tree node or None.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if self.state != self._SCAN_STATE_STOP:
raise RuntimeError(u'Unsupported scan state.')
self.remaining_data = None
self.remaining_data_size = 0
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_START
def Scanning(self, scan_tree_node, total_data_offset):
"""Sets the state to scanning.
Args:
scan_tree_node: the active scan tree node.
total_data_offset: the offset of the resulting match relative
to the start of the total data scanned.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self.scan_tree_node = scan_tree_node
self.state = self._SCAN_STATE_SCANNING
self.total_data_offset = total_data_offset
def Stop(self):
"""Sets the state to stop.
Raises:
RuntimeError: when a unsupported state is encountered.
"""
if (self.state != self._SCAN_STATE_START and
self.state != self._SCAN_STATE_SCANNING):
raise RuntimeError(u'Unsupported scan state.')
self.scan_tree_node = None
self.state = self._SCAN_STATE_STOP
class ScanTreeScannerBase(object):
"""Class that implements a scan tree-based scanner base."""
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(ScanTreeScannerBase, self).__init__()
self._scan_tree = None
self._specification_store = specification_store
def _ScanBufferScanState(
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the scan tree.
This function implements a BoyerMooreHorspool equivalent approach
in combination with the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
Raises:
RuntimeError: if the total data offset, total data size or the last
pattern offset value is out of bounds
"""
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size, value out of bounds.')
if total_data_offset < 0 or (
total_data_size is not None and total_data_offset >= total_data_size):
raise RuntimeError(u'Invalid total data offset, value out of bounds.')
data_offset = 0
scan_tree_node = scan_state.scan_tree_node
if scan_state.remaining_data:
# str.join() should be more efficient then concatenation by +.
data = ''.join([scan_state.remaining_data, data])
data_size += scan_state.remaining_data_size
scan_state.remaining_data = None
scan_state.remaining_data_size = 0
if (total_data_size is not None and
total_data_offset + data_size >= total_data_size):
match_on_boundary = True
else:
match_on_boundary = False
while data_offset < data_size:
if (not match_on_boundary and
data_offset + scan_tree_object.largest_length >= data_size):
break
found_match = False
scan_done = False
while not scan_done:
scan_object = scan_tree_node.CompareByteValue(
data, data_offset, data_size, total_data_offset,
total_data_size=total_data_size)
if isinstance(scan_object, scan_tree.ScanTreeNode):
scan_tree_node = scan_object
else:
scan_done = True
if isinstance(scan_object, patterns.Pattern):
pattern_length = len(scan_object.signature.expression)
data_last_offset = data_offset + pattern_length
if cmp(scan_object.signature.expression,
data[data_offset:data_last_offset]) == 0:
if (not scan_object.signature.is_bound or
scan_object.signature.offset == data_offset):
found_match = True
logging.debug(
u'Signature match at data offset: 0x{0:08x}.'.format(
data_offset))
scan_state.AddMatch(total_data_offset + data_offset, scan_object)
if found_match:
skip_value = len(scan_object.signature.expression)
scan_tree_node = scan_tree_object.root_node
else:
last_pattern_offset = (
scan_tree_object.skip_table.skip_pattern_length - 1)
if data_offset + last_pattern_offset >= data_size:
raise RuntimeError(
u'Invalid last pattern offset, value out of bounds.')
skip_value = 0
while last_pattern_offset >= 0 and not skip_value:
last_data_offset = data_offset + last_pattern_offset
byte_value = ord(data[last_data_offset])
skip_value = scan_tree_object.skip_table[byte_value]
last_pattern_offset -= 1
if not skip_value:
skip_value = 1
scan_tree_node = scan_tree_object.root_node
data_offset += skip_value
if not match_on_boundary and data_offset < data_size:
scan_state.remaining_data = data[data_offset:data_size]
scan_state.remaining_data_size = data_size - data_offset
scan_state.Scanning(scan_tree_node, total_data_offset + data_offset)
def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state):
"""Scans the remaining data in the scan state using the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
"""
if scan_state.remaining_data:
data = scan_state.remaining_data
data_size = scan_state.remaining_data_size
scan_state.remaining_data = None
scan_state.remaining_data_size = 0
# Setting the total data size will make boundary matches are returned
# in this scanning pass.
total_data_size = scan_state.total_data_size
if total_data_size is None:
total_data_size = scan_state.total_data_offset + data_size
self._ScanBufferScanState(
scan_tree_object, scan_state, data, data_size,
scan_state.total_data_offset, total_data_size=total_data_size)
scan_state.Stop()
def GetScanResults(self, scan_state):
"""Retrieves the scan results.
Args:
scan_state: the scan state (instance of ScanState).
Return:
A list of scan results (instances of _ScanResult).
"""
scan_results = {}
for scan_match in scan_state.GetMatches():
specification = scan_match.specification
identifier = specification.identifier
logging.debug(
u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format(
scan_match.total_data_offset, identifier))
if identifier not in scan_results:
scan_results[identifier] = _ScanResult(specification)
scan_results[identifier].scan_matches.append(scan_match)
return scan_results.values()
class Scanner(ScanTreeScannerBase):
"""Class that implements a scan tree-based scanner."""
_READ_BUFFER_SIZE = 512
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(Scanner, self).__init__(specification_store)
def ScanBuffer(self, scan_state, data, data_size):
"""Scans a buffer.
Args:
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
"""
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size,
scan_state.total_data_offset,
total_data_size=scan_state.total_data_size)
def ScanFileObject(self, file_object):
"""Scans a file-like object.
Args:
file_object: a file-like object.
Returns:
A list of scan results (instances of ScanResult).
"""
file_offset = 0
if hasattr(file_object, 'get_size'):
file_size = file_object.get_size()
else:
file_object.seek(0, os.SEEK_END)
file_size = file_object.tell()
scan_state = self.StartScan(total_data_size=file_size)
file_object.seek(file_offset, os.SEEK_SET)
while file_offset < file_size:
data = file_object.read(self._READ_BUFFER_SIZE)
data_size = len(data)
if data_size == 0:
break
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size, file_offset,
total_data_size=file_size)
file_offset += data_size
self.StopScan(scan_state)
return self.GetScanResults(scan_state)
def StartScan(self, total_data_size=None):
"""Starts a scan.
The function sets up the scanning related structures if necessary.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A scan state (instance of ScanState).
Raises:
RuntimeError: when total data size is invalid.
"""
if total_data_size is not None and total_data_size < 0:
raise RuntimeError(u'Invalid total data size.')
if self._scan_tree is None:
self._scan_tree = scan_tree.ScanTree(
self._specification_store, None)
return ScanState(self._scan_tree.root_node, total_data_size=total_data_size)
def StopScan(self, scan_state):
"""Stops a scan.
Args:
scan_state: the scan state (instance of ScanState).
"""
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
class OffsetBoundScanner(ScanTreeScannerBase):
"""Class that implements an offset-bound scan tree-based scanner."""
_READ_BUFFER_SIZE = 512
def __init__(self, specification_store):
"""Initializes the scanner.
Args:
specification_store: the specification store (instance of
SpecificationStore) that contains the format
specifications.
"""
super(OffsetBoundScanner, self).__init__(specification_store)
self._footer_scan_tree = None
self._footer_spanning_range = None
self._header_scan_tree = None
self._header_spanning_range = None
def _GetFooterRange(self, total_data_size):
"""Retrieves the read buffer aligned footer range.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A range (instance of Range).
"""
# The actual footer range is in reverse since the spanning footer range
# is based on positive offsets, where 0 is the end of file.
if self._footer_spanning_range.end_offset < total_data_size:
footer_range_start_offset = (
total_data_size - self._footer_spanning_range.end_offset)
else:
footer_range_start_offset = 0
# Calculate the lower bound modulus of the footer range start offset
# in increments of the read buffer size.
footer_range_start_offset /= self._READ_BUFFER_SIZE
footer_range_start_offset *= self._READ_BUFFER_SIZE
# Calculate the upper bound modulus of the footer range size
# in increments of the read buffer size.
footer_range_size = self._footer_spanning_range.size
remainder = footer_range_size % self._READ_BUFFER_SIZE
footer_range_size /= self._READ_BUFFER_SIZE
if remainder > 0:
footer_range_size += 1
footer_range_size *= self._READ_BUFFER_SIZE
return range_list.Range(footer_range_start_offset, footer_range_size)
def _GetHeaderRange(self):
"""Retrieves the read buffer aligned header range.
Returns:
A range (instance of Range).
"""
# Calculate the lower bound modulus of the header range start offset
# in increments of the read buffer size.
header_range_start_offset = self._header_spanning_range.start_offset
header_range_start_offset /= self._READ_BUFFER_SIZE
header_range_start_offset *= self._READ_BUFFER_SIZE
# Calculate the upper bound modulus of the header range size
# in increments of the read buffer size.
header_range_size = self._header_spanning_range.size
remainder = header_range_size % self._READ_BUFFER_SIZE
header_range_size /= self._READ_BUFFER_SIZE
if remainder > 0:
header_range_size += 1
header_range_size *= self._READ_BUFFER_SIZE
return range_list.Range(header_range_start_offset, header_range_size)
def _ScanBufferScanState(
self, scan_tree_object, scan_state, data, data_size, total_data_offset,
total_data_size=None):
"""Scans a buffer using the scan tree.
This function implements a BoyerMooreHorspool equivalent approach
in combination with the scan tree.
Args:
scan_tree_object: the scan tree (instance of ScanTree).
scan_state: the scan state (instance of ScanState).
data: a buffer containing raw data.
data_size: the size of the raw data in the buffer.
total_data_offset: the offset of the data relative to the start of
the total data scanned.
total_data_size: optional value to indicate the total data size.
The default is None.
"""
scan_done = False
scan_tree_node = scan_tree_object.root_node
while not scan_done:
data_offset = 0
scan_object = scan_tree_node.CompareByteValue(
data, data_offset, data_size, total_data_offset,
total_data_size=total_data_size)
if isinstance(scan_object, scan_tree.ScanTreeNode):
scan_tree_node = scan_object
else:
scan_done = True
if isinstance(scan_object, patterns.Pattern):
pattern_length = len(scan_object.signature.expression)
pattern_start_offset = scan_object.signature.offset
pattern_end_offset = pattern_start_offset + pattern_length
if cmp(scan_object.signature.expression,
data[pattern_start_offset:pattern_end_offset]) == 0:
scan_state.AddMatch(
total_data_offset + scan_object.signature.offset, scan_object)
logging.debug(
u'Signature match at data offset: 0x{0:08x}.'.format(data_offset))
# TODO: implement.
# def ScanBuffer(self, scan_state, data, data_size):
# """Scans a buffer.
# Args:
# scan_state: the scan state (instance of ScanState).
# data: a buffer containing raw data.
# data_size: the size of the raw data in the buffer.
# """
# # TODO: fix footer scanning logic.
# # need to know the file size here for the footers.
# # TODO: check for clashing ranges?
# header_range = self._GetHeaderRange()
# footer_range = self._GetFooterRange(scan_state.total_data_size)
# if self._scan_tree == self._header_scan_tree:
# if (scan_state.total_data_offset >= header_range.start_offset and
# scan_state.total_data_offset < header_range.end_offset):
# self._ScanBufferScanState(
# self._scan_tree, scan_state, data, data_size,
# scan_state.total_data_offset,
# total_data_size=scan_state.total_data_size)
# elif scan_state.total_data_offset > header_range.end_offset:
# # TODO: implement.
# pass
# if self._scan_tree == self._footer_scan_tree:
# if (scan_state.total_data_offset >= footer_range.start_offset and
# scan_state.total_data_offset < footer_range.end_offset):
# self._ScanBufferScanState(
# self._scan_tree, scan_state, data, data_size,
# scan_state.total_data_offset,
# total_data_size=scan_state.total_data_size)
def ScanFileObject(self, file_object):
"""Scans a file-like object.
Args:
file_object: a file-like object.
Returns:
A scan state (instance of ScanState).
"""
# TODO: add support for fixed size block-based reads.
if hasattr(file_object, 'get_size'):
file_size = file_object.get_size()
else:
file_object.seek(0, os.SEEK_END)
file_size = file_object.tell()
file_offset = 0
scan_state = self.StartScan(total_data_size=file_size)
if self._header_scan_tree.root_node is not None:
header_range = self._GetHeaderRange()
# TODO: optimize the read by supporting fixed size block-based reads.
# if file_offset < header_range.start_offset:
# file_offset = header_range.start_offset
file_object.seek(file_offset, os.SEEK_SET)
# TODO: optimize the read by supporting fixed size block-based reads.
# data = file_object.read(header_range.size)
data = file_object.read(header_range.end_offset)
data_size = len(data)
if data_size > 0:
self._ScanBufferScanState(
self._scan_tree, scan_state, data, data_size, file_offset,
total_data_size=file_size)
file_offset += data_size
if self._footer_scan_tree.root_node is not None:
self.StopScan(scan_state)
self._scan_tree = self._footer_scan_tree
scan_state.Reset(self._scan_tree.root_node)
if self._footer_scan_tree.root_node is not None:
footer_range = self._GetFooterRange(file_size)
# Note that the offset in the footer scan tree start with 0. Make sure
# the data offset of the data being scanned is aligned with the offset
# in the scan tree.
if footer_range.start_offset < self._footer_spanning_range.end_offset:
data_offset = (
self._footer_spanning_range.end_offset - footer_range.start_offset)
else:
data_offset = 0
if file_offset < footer_range.start_offset:
file_offset = footer_range.start_offset
file_object.seek(file_offset, os.SEEK_SET)
data = file_object.read(self._READ_BUFFER_SIZE)
data_size = len(data)
if data_size > 0:
self._ScanBufferScanState(
self._scan_tree, scan_state, data[data_offset:],
data_size - data_offset, file_offset + data_offset,
total_data_size=file_size)
self.StopScan(scan_state)
return self.GetScanResults(scan_state)
def StartScan(self, total_data_size=None):
"""Starts a scan.
The function sets up the scanning related structures if necessary.
Args:
total_data_size: optional value to indicate the total data size.
The default is None.
Returns:
A list of scan results (instances of ScanResult).
Raises:
RuntimeError: when total data size is invalid.
"""
if total_data_size is None or total_data_size < 0:
raise RuntimeError(u'Invalid total data size.')
if self._header_scan_tree is None:
self._header_scan_tree = scan_tree.ScanTree(
self._specification_store, True,
offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE)
if self._header_spanning_range is None:
spanning_range = self._header_scan_tree.range_list.GetSpanningRange()
self._header_spanning_range = spanning_range
if self._footer_scan_tree is None:
self._footer_scan_tree = scan_tree.ScanTree(
self._specification_store, True,
offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE)
if self._footer_spanning_range is None:
spanning_range = self._footer_scan_tree.range_list.GetSpanningRange()
self._footer_spanning_range = spanning_range
if self._header_scan_tree.root_node is not None:
self._scan_tree = self._header_scan_tree
elif self._footer_scan_tree.root_node is not None:
self._scan_tree = self._footer_scan_tree
else:
self._scan_tree = None
if self._scan_tree is not None:
root_node = self._scan_tree.root_node
else:
root_node = None
return ScanState(root_node, total_data_size=total_data_size)
def StopScan(self, scan_state):
"""Stops a scan.
Args:
scan_state: the scan state (instance of ScanState).
"""
self._ScanBufferScanStateFinal(self._scan_tree, scan_state)
self._scan_tree = None
+119
View File
@@ -0,0 +1,119 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the format scanner classes."""
import unittest
from plaso.classifier import scanner
from plaso.classifier import test_lib
class ScannerTest(unittest.TestCase):
"""Class to test the scanner."""
def testInitialize(self):
"""Function to test the initialize function."""
store = test_lib.CreateSpecificationStore()
# Signature for LNK
data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
'\x00\x00\x00\x46')
# Signature for REGF
data2 = 'regf'
# Random data
data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4'
# Boundary scan test
data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK')
data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z')
# Large buffer test
data5_size = 1024 * 1024
data5 = '\x00' * (data5_size - 4)
data5 += 'PK\x07\x08'
test_scanner = scanner.Scanner(store)
total_data_size = len(data1)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data1, len(data1))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data1, len(data1))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data2)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data2, len(data2))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data2, len(data2))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data3)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data3, len(data3))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 0)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data3, len(data3))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 0)
total_data_size = len(data4a) + len(data4b)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
scan_state = test_scanner.StartScan(total_data_size=None)
test_scanner.ScanBuffer(scan_state, data4a, len(data4a))
test_scanner.ScanBuffer(scan_state, data4b, len(data4b))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
total_data_size = len(data5)
scan_state = test_scanner.StartScan(total_data_size=total_data_size)
test_scanner.ScanBuffer(scan_state, data5, len(data5))
test_scanner.StopScan(scan_state)
self.assertEqual(len(scan_state.GetMatches()), 1)
if __name__ == '__main__':
unittest.main()
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The format specification classes."""
class Signature(object):
"""Class that defines a signature of a format specification.
The signature consists of a byte string expression, an optional
offset relative to the start of the data, and a value to indidate
if the expression is bound to the offset.
"""
def __init__(self, expression, offset=None, is_bound=False):
"""Initializes the signature.
Args:
expression: string containing the expression of the signature.
The expression consists of a byte string at the moment
regular expression (regexp) are not supported.
offset: the offset of the signature or None by default. None is used
to indicate the signature has no offset. A positive offset
is relative from the start of the data a negative offset
is relative from the end of the data.
is_bound: boolean value to indicate the signature must be bound to
the offset or False by default.
"""
self.expression = expression
self.offset = offset
self.is_bound = is_bound
class Specification(object):
"""Class that contains a format specification."""
def __init__(self, identifier):
"""Initializes the specification.
Args:
identifier: string containing a unique name for the format.
"""
self.identifier = identifier
self.mime_types = []
self.signatures = []
self.universal_type_identifiers = []
def AddMimeType(self, mime_type):
"""Adds a MIME type."""
self.mime_types.append(mime_type)
def AddNewSignature(self, expression, offset=None, is_bound=False):
"""Adds a signature.
Args:
expression: string containing the expression of the signature.
offset: the offset of the signature or None by default. None is used
to indicate the signature has no offset. A positive offset
is relative from the start of the data a negative offset
is relative from the end of the data.
is_bound: boolean value to indicate the signature must be bound to
the offset or False by default.
"""
self.signatures.append(
Signature(expression, offset=offset, is_bound=is_bound))
def AddUniversalTypeIdentifier(self, universal_type_identifiers):
"""Adds a Universal Type Identifier (UTI)."""
self.universal_type_identifiers.append(universal_type_identifiers)
class SpecificationStore(object):
"""Class that servers as a store for specifications."""
def __init__(self):
"""Initializes the specification store."""
self._format_specifications = {}
@property
def specifications(self):
"""A specifications iterator object."""
return self._format_specifications.itervalues()
def AddNewSpecification(self, identifier):
"""Adds a new specification.
Args:
identifier: a string containing the format identifier,
which should be unique for the store.
Returns:
a instance of Specification.
Raises:
ValueError: if the store already contains a specification with
the same identifier.
"""
if identifier in self._format_specifications:
raise ValueError("specification {0:s} is already defined in "
"store.".format(identifier))
self._format_specifications[identifier] = Specification(identifier)
return self._format_specifications[identifier]
def AddSpecification(self, specification):
"""Adds a specification.
Args:
specification: the specification (instance of Specification).
Raises:
KeyError: if the store already contains a specification with
the same identifier.
"""
if specification.identifier in self._format_specifications:
raise KeyError(
u'Specification {0:s} is already defined in store.'.format(
specification.identifier))
self._format_specifications[specification.identifier] = specification
def ReadFromFileObject(self, unused_file_object):
"""Reads the specification store from a file-like object.
Args:
unused_file_object: A file-like object.
Raises:
RuntimeError: because functionality is not implemented yet.
"""
# TODO: implement this function.
raise RuntimeError(u'Function not implemented.')
def ReadFromFile(self, filename):
"""Reads the specification store from a file.
Args:
filename: The name of the file.
"""
file_object = open(filename, 'r')
self.ReadFromFileObject(file_object)
file_object.close()
+46
View File
@@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the format specification classes."""
import unittest
from plaso.classifier import specification
class SpecificationStoreTest(unittest.TestCase):
"""Class to test the specification store."""
def testAddSpecification(self):
"""Function to test the add specification function."""
store = specification.SpecificationStore()
format_regf = specification.Specification('REGF')
format_regf.AddNewSignature('regf', offset=0)
format_esedb = specification.Specification('ESEDB')
format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4)
store.AddSpecification(format_regf)
store.AddSpecification(format_esedb)
with self.assertRaises(KeyError):
store.AddSpecification(format_regf)
if __name__ == '__main__':
unittest.main()
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared test cases."""
from plaso.classifier import specification
def CreateSpecificationStore():
"""Creates a format specification store for testing purposes.
Returns:
A format specification store (instance of SpecificationStore).
"""
store = specification.SpecificationStore()
test_specification = store.AddNewSpecification('7zip')
test_specification.AddMimeType('application/x-7z-compressed')
test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive')
test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0)
test_specification = store.AddNewSpecification('esedb')
test_specification.AddNewSignature(
'\xef\xcd\xab\x89', offset=4, is_bound=True)
test_specification = store.AddNewSpecification('evt')
test_specification.AddNewSignature(
'\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0,
is_bound=True)
test_specification = store.AddNewSpecification('evtx')
test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('ewf')
test_specification.AddNewSignature(
'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
test_specification = specification.Specification('ewf_logical')
test_specification.AddNewSignature(
'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('lnk')
test_specification.AddNewSignature(
'\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00'
'\x00\x00\x00\x46', offset=0)
test_specification = store.AddNewSpecification('msiecf_index_dat')
test_specification.AddNewSignature(
'Client UrlCache MMF Ver ', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('nk2')
test_specification.AddNewSignature(
'\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0,
is_bound=True)
test_specification = store.AddNewSpecification('olecf')
test_specification.AddNewSignature(
'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True)
test_specification.AddNewSignature(
'\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('pff')
test_specification.AddNewSignature('!BDN', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('qcow')
test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('rar')
test_specification.AddMimeType('application/x-rar-compressed')
test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive')
test_specification.AddNewSignature(
'Rar!\x1a\x07\x00', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('regf')
test_specification.AddNewSignature('regf', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('thumbache_db_cache')
test_specification.AddNewSignature('CMMM', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('thumbache_db_index')
test_specification.AddNewSignature('IMMM', offset=0, is_bound=True)
test_specification = store.AddNewSpecification('zip')
test_specification.AddMimeType('application/zip')
test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive')
# WinZip 8 signature.
test_specification.AddNewSignature('PK00', offset=0, is_bound=True)
test_specification.AddNewSignature('PK\x01\x02')
test_specification.AddNewSignature('PK\x03\x04', offset=0)
test_specification.AddNewSignature('PK\x05\x05')
# Will be at offset 0 when the archive is empty.
test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True)
test_specification.AddNewSignature('PK\x06\x06')
test_specification.AddNewSignature('PK\x06\x07')
test_specification.AddNewSignature('PK\x06\x08')
# Will be at offset 0 when this is spanned archive.
test_specification.AddNewSignature('PK\x07\x08')
return store
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+202
View File
@@ -0,0 +1,202 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The file format classifier."""
# TODO: rewrite most of the classifier in C and integrate with the code in:
# plaso/classifier
import gzip
import logging
import os
import tarfile
import zipfile
import zlib
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.lib import errors
class Classifier(object):
"""Class that defines the file format classifier."""
_MAGIC_VALUES = {
'ZIP': {'length': 4, 'offset': 0, 'values': ['P', 'K', '\x03', '\x04']},
'TAR': {'length': 5, 'offset': 257, 'values': ['u', 's', 't', 'a', 'r']},
'GZ': {'length': 2, 'offset': 0, 'values': ['\x1f', '\x8b']},
}
# TODO: Remove this logic when the classifier is ready.
# This is only used temporary until files can be classified.
magic_max_length = 0
# Defines the maximum depth into a file (for SmartOpenFiles).
MAX_FILE_DEPTH = 3
@classmethod
def _SmartOpenFile(cls, file_entry):
"""Return a generator for all pathspec protobufs extracted from a file.
If the file is compressed then extract all members and include
them into the processing queue.
Args:
file_entry: The file entry object.
Yields:
A path specification (instance of dfvfs.PathSpec) of embedded file
entries.
"""
file_object = file_entry.GetFileObject()
# TODO: Remove when classifier gets deployed. Then we
# call the classifier here and use that for definition (and
# then we forward the classifier definition in the pathspec
# protobuf.
file_object.seek(0, os.SEEK_SET)
if not cls.magic_max_length:
for magic_value in cls._MAGIC_VALUES.values():
cls.magic_max_length = max(
cls.magic_max_length,
magic_value['length'] + magic_value['offset'])
header = file_object.read(cls.magic_max_length)
file_classification = ''
# Go over each and every magic value defined and compare
# each read byte (according to original offset and current one)
# If all match, then we have a particular file format and we
# can move on.
for m_value, m_dict in cls._MAGIC_VALUES.items():
length = m_dict['length'] + m_dict['offset']
if len(header) < length:
continue
offset = m_dict['offset']
magic = m_dict['values']
if header[offset:offset + len(magic)] == ''.join(magic):
file_classification = m_value
break
# TODO: refactor the file type specific code into sub functions.
if file_classification == 'ZIP':
try:
file_object.seek(0, os.SEEK_SET)
zip_file = zipfile.ZipFile(file_object, 'r')
# TODO: Make this is a more "sane" check, and perhaps
# not entirely skip the file if it has this particular
# ending, but for now, this both slows the tool down
# considerably and makes it also more unstable.
_, _, filename_extension = file_entry.name.rpartition(u'.')
if filename_extension in [u'.jar', u'.sym', u'.xpi']:
file_object.close()
logging.debug(
u'Unsupported ZIP sub type: {0:s} detected in file: {1:s}'.format(
filename_extension, file_entry.path_spec.comparable))
return
for info in zip_file.infolist():
if info.file_size > 0:
logging.debug(
u'Including: {0:s} from ZIP into process queue.'.format(
info.filename))
yield path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_ZIP, location=info.filename,
parent=file_entry.path_spec)
except zipfile.BadZipfile:
pass
elif file_classification == 'GZ':
try:
type_indicator = file_entry.path_spec.type_indicator
if type_indicator == definitions.TYPE_INDICATOR_GZIP:
raise errors.SameFileType
file_object.seek(0, os.SEEK_SET)
gzip_file = gzip.GzipFile(fileobj=file_object, mode='rb')
_ = gzip_file.read(4)
gzip_file.close()
logging.debug((
u'Including: {0:s} as GZIP compressed stream into process '
u'queue.').format(file_entry.name))
yield path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_GZIP, parent=file_entry.path_spec)
except (IOError, zlib.error, errors.SameFileType):
pass
# TODO: Add BZ2 support.
elif file_classification == 'TAR':
try:
file_object.seek(0, os.SEEK_SET)
tar_file = tarfile.open(fileobj=file_object, mode='r')
for name_info in tar_file.getmembers():
if not name_info.isfile():
continue
name = name_info.path
logging.debug(
u'Including: {0:s} from TAR into process queue.'.format(name))
yield path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_TAR, location=name,
parent=file_entry.path_spec)
except tarfile.ReadError:
pass
file_object.close()
@classmethod
def SmartOpenFiles(cls, file_entry, depth=0):
"""Generate a list of all available PathSpecs extracted from a file.
Args:
file_entry: A file entry object.
depth: Incrementing number that defines the current depth into
a file (file inside a ZIP file is depth 1, file inside a tar.gz
would be of depth 2).
Yields:
A file entry object (instance of dfvfs.FileEntry).
"""
if depth >= cls.MAX_FILE_DEPTH:
return
for path_spec in cls._SmartOpenFile(file_entry):
sub_file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
if sub_file_entry is None:
logging.debug(
u'Unable to open file: {0:s}'.format(path_spec.comparable))
continue
yield sub_file_entry
depth += 1
for sub_file_entry in cls.SmartOpenFiles(sub_file_entry, depth=depth):
yield sub_file_entry
+421
View File
@@ -0,0 +1,421 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generic collector that supports both file system and image files."""
import hashlib
import logging
import os
from dfvfs.helpers import file_system_searcher
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.lib import errors as dfvfs_errors
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.engine import queue
from plaso.lib import errors
class Collector(queue.ItemQueueProducer):
"""Class that implements a collector object."""
def __init__(
self, process_queue, source_path, source_path_spec,
resolver_context=None):
"""Initializes the collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
process_queue: The process queue (instance of Queue). This queue contains
the file entries that need to be processed.
source_path: Path of the source file or directory.
source_path_spec: The source path specification (instance of
dfvfs.PathSpec) as determined by the file system
scanner. The default is None.
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None.
"""
super(Collector, self).__init__(process_queue)
self._filter_find_specs = None
self._fs_collector = FileSystemCollector(process_queue)
self._resolver_context = resolver_context
# TODO: remove the need to pass source_path
self._source_path = os.path.abspath(source_path)
self._source_path_spec = source_path_spec
self._vss_stores = None
def __enter__(self):
"""Enters a with statement."""
return self
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Exits a with statement."""
return
def _ProcessImage(self, volume_path_spec, find_specs=None):
"""Processes a volume within a storage media image.
Args:
volume_path_spec: The path specification of the volume containing
the file system.
find_specs: Optional list of find specifications (instances of
dfvfs.FindSpec). The default is None.
"""
if find_specs:
logging.debug(u'Collecting from image file: {0:s} with filter'.format(
self._source_path))
else:
logging.debug(u'Collecting from image file: {0:s}'.format(
self._source_path))
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=volume_path_spec)
try:
file_system = path_spec_resolver.Resolver.OpenFileSystem(
path_spec, resolver_context=self._resolver_context)
except IOError as exception:
logging.error(
u'Unable to open file system with error: {0:s}'.format(exception))
return
try:
self._fs_collector.Collect(
file_system, path_spec, find_specs=find_specs)
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
logging.warning(u'{0:s}'.format(exception))
if find_specs:
logging.debug(u'Collection from image with filter FAILED.')
else:
logging.debug(u'Collection from image FAILED.')
return
if self._abort:
return
if self._vss_stores:
self._ProcessVSS(volume_path_spec, find_specs=find_specs)
if find_specs:
logging.debug(u'Collection from image with filter COMPLETED.')
else:
logging.debug(u'Collection from image COMPLETED.')
def _ProcessVSS(self, volume_path_spec, find_specs=None):
"""Processes a VSS volume within a storage media image.
Args:
volume_path_spec: The path specification of the volume containing
the file system.
find_specs: Optional list of find specifications (instances of
dfvfs.FindSpec). The default is None.
"""
logging.info(u'Processing VSS.')
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/',
parent=volume_path_spec)
vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
vss_path_spec, resolver_context=self._resolver_context)
number_of_vss = vss_file_entry.number_of_sub_file_entries
# In plaso 1 represents the first store index in dfvfs and pyvshadow 0
# represents the first store index so 1 is subtracted.
vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]
for store_index in vss_store_range:
if self._abort:
return
if find_specs:
logging.info((
u'Collecting from VSS volume: {0:d} out of: {1:d} '
u'with filter').format(store_index + 1, number_of_vss))
else:
logging.info(u'Collecting from VSS volume: {0:d} out of: {1:d}'.format(
store_index + 1, number_of_vss))
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index,
parent=volume_path_spec)
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=vss_path_spec)
file_system = path_spec_resolver.Resolver.OpenFileSystem(
path_spec, resolver_context=self._resolver_context)
try:
self._fs_collector.Collect(
file_system, path_spec, find_specs=find_specs)
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
logging.warning(u'{0:s}'.format(exception))
if find_specs:
logging.debug(
u'Collection from VSS store: {0:d} with filter FAILED.'.format(
store_index + 1))
else:
logging.debug(u'Collection from VSS store: {0:d} FAILED.'.format(
store_index + 1))
return
if find_specs:
logging.debug(
u'Collection from VSS store: {0:d} with filter COMPLETED.'.format(
store_index + 1))
else:
logging.debug(u'Collection from VSS store: {0:d} COMPLETED.'.format(
store_index + 1))
def Collect(self):
"""Collects files from the source."""
source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
self._source_path_spec, resolver_context=self._resolver_context)
if not source_file_entry:
logging.warning(u'No files to collect.')
self.SignalEndOfInput()
return
if (not source_file_entry.IsDirectory() and
not source_file_entry.IsFile() and
not source_file_entry.IsDevice()):
raise errors.CollectorError(
u'Source path: {0:s} not a device, file or directory.'.format(
self._source_path))
type_indicator = self._source_path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
if source_file_entry.IsFile():
self.ProduceItem(self._source_path_spec)
else:
file_system = path_spec_resolver.Resolver.OpenFileSystem(
self._source_path_spec, resolver_context=self._resolver_context)
try:
self._fs_collector.Collect(
file_system, self._source_path_spec,
find_specs=self._filter_find_specs)
except (dfvfs_errors.AccessError,
dfvfs_errors.BackEndError) as exception:
logging.warning(u'{0:s}'.format(exception))
else:
self._ProcessImage(
self._source_path_spec.parent, find_specs=self._filter_find_specs)
self.SignalEndOfInput()
def SetCollectDirectoryMetadata(self, collect_directory_metadata):
"""Sets the collect directory metadata flag.
Args:
collect_directory_metadata: Boolean value to indicate to collect
directory metadata.
"""
self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata)
def SetFilter(self, filter_find_specs):
"""Sets the collection filter find specifications.
Args:
filter_find_specs: List of filter find specifications (instances of
dfvfs.FindSpec).
"""
self._filter_find_specs = filter_find_specs
def SetVssInformation(self, vss_stores):
"""Sets the Volume Shadow Snapshots (VSS) information.
This function will enable VSS collection.
Args:
vss_stores: The range of VSS stores to include in the collection,
where 1 represents the first store.
"""
self._vss_stores = vss_stores
def SignalAbort(self):
"""Signals the producer to abort."""
super(Collector, self).SignalAbort()
self._fs_collector.SignalAbort()
class FileSystemCollector(queue.ItemQueueProducer):
"""Class that implements a file system collector object."""
def __init__(self, process_queue):
"""Initializes the collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
process_queue: The process queue (instance of Queue). This queue contains
the file entries that need to be processed.
"""
super(FileSystemCollector, self).__init__(process_queue)
self._collect_directory_metadata = True
self._duplicate_file_check = False
self._hashlist = {}
self.number_of_file_entries = 0
def __enter__(self):
"""Enters a with statement."""
return self
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Exits a with statement."""
return
def _CalculateNTFSTimeHash(self, file_entry):
"""Return a hash value calculated from a NTFS file's metadata.
Args:
file_entry: The file entry (instance of TSKFileEntry).
Returns:
A hash value (string) that can be used to determine if a file's timestamp
value has changed.
"""
stat_object = file_entry.GetStat()
ret_hash = hashlib.md5()
ret_hash.update('atime:{0:d}.{1:d}'.format(
getattr(stat_object, 'atime', 0),
getattr(stat_object, 'atime_nano', 0)))
ret_hash.update('crtime:{0:d}.{1:d}'.format(
getattr(stat_object, 'crtime', 0),
getattr(stat_object, 'crtime_nano', 0)))
ret_hash.update('mtime:{0:d}.{1:d}'.format(
getattr(stat_object, 'mtime', 0),
getattr(stat_object, 'mtime_nano', 0)))
ret_hash.update('ctime:{0:d}.{1:d}'.format(
getattr(stat_object, 'ctime', 0),
getattr(stat_object, 'ctime_nano', 0)))
return ret_hash.hexdigest()
def _ProcessDirectory(self, file_entry):
"""Processes a directory and extract its metadata if necessary."""
# Need to do a breadth-first search otherwise we'll hit the Python
# maximum recursion depth.
sub_directories = []
for sub_file_entry in file_entry.sub_file_entries:
if self._abort:
return
try:
if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
continue
except dfvfs_errors.BackEndError as exception:
logging.warning(
u'Unable to process file: {0:s} with error: {1:s}'.format(
sub_file_entry.path_spec.comparable.replace(
u'\n', u';'), exception))
continue
# For TSK-based file entries only, ignore the virtual /$OrphanFiles
# directory.
if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
if file_entry.IsRoot() and sub_file_entry.name == u'$OrphanFiles':
continue
if sub_file_entry.IsDirectory():
# This check is here to improve performance by not producing
# path specifications that don't get processed.
if self._collect_directory_metadata:
self.ProduceItem(sub_file_entry.path_spec)
self.number_of_file_entries += 1
sub_directories.append(sub_file_entry)
elif sub_file_entry.IsFile():
# If we are dealing with a VSS we want to calculate a hash
# value based on available timestamps and compare that to previously
# calculated hash values, and only include the file into the queue if
# the hash does not match.
if self._duplicate_file_check:
hash_value = self._CalculateNTFSTimeHash(sub_file_entry)
inode = getattr(sub_file_entry.path_spec, 'inode', 0)
if inode in self._hashlist:
if hash_value in self._hashlist[inode]:
continue
self._hashlist.setdefault(inode, []).append(hash_value)
self.ProduceItem(sub_file_entry.path_spec)
self.number_of_file_entries += 1
for sub_file_entry in sub_directories:
if self._abort:
return
try:
self._ProcessDirectory(sub_file_entry)
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
logging.warning(u'{0:s}'.format(exception))
def Collect(self, file_system, path_spec, find_specs=None):
"""Collects files from the file system.
Args:
file_system: The file system (instance of dfvfs.FileSystem).
path_spec: The path specification (instance of dfvfs.PathSpec).
find_specs: Optional list of find specifications (instances of
dfvfs.FindSpec). The default is None.
"""
if find_specs:
searcher = file_system_searcher.FileSystemSearcher(file_system, path_spec)
for path_spec in searcher.Find(find_specs=find_specs):
if self._abort:
return
self.ProduceItem(path_spec)
self.number_of_file_entries += 1
else:
file_entry = file_system.GetFileEntryByPathSpec(path_spec)
self._ProcessDirectory(file_entry)
def SetCollectDirectoryMetadata(self, collect_directory_metadata):
"""Sets the collect directory metadata flag.
Args:
collect_directory_metadata: Boolean value to indicate to collect
directory metadata.
"""
self._collect_directory_metadata = collect_directory_metadata
+354
View File
@@ -0,0 +1,354 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The unit tests for the generic collector object."""
import logging
import os
import shutil
import tempfile
import unittest
from dfvfs.helpers import file_system_searcher
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import context
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.engine import collector
from plaso.engine import queue
from plaso.engine import single_process
from plaso.engine import utils as engine_utils
class TempDirectory(object):
"""A self cleaning temporary directory."""
def __init__(self):
"""Initializes the temporary directory."""
super(TempDirectory, self).__init__()
self.name = u''
def __enter__(self):
"""Make this work with the 'with' statement."""
self.name = tempfile.mkdtemp()
return self.name
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make this work with the 'with' statement."""
shutil.rmtree(self.name, True)
class TestCollectorQueueConsumer(queue.ItemQueueConsumer):
"""Class that implements a test collector queue consumer."""
def __init__(self, queue_object):
"""Initializes the queue consumer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(TestCollectorQueueConsumer, self).__init__(queue_object)
self.path_specs = []
def _ConsumeItem(self, path_spec):
"""Consumes an item callback for ConsumeItems.
Args:
path_spec: a path specification (instance of dfvfs.PathSpec).
"""
self.path_specs.append(path_spec)
@property
def number_of_path_specs(self):
"""The number of path specifications."""
return len(self.path_specs)
def GetFilePaths(self):
"""Retrieves a list of file paths from the path specifications."""
file_paths = []
for path_spec in self.path_specs:
location = getattr(path_spec, 'location', None)
if location is not None:
file_paths.append(location)
return file_paths
class CollectorTestCase(unittest.TestCase):
"""The collector test case."""
_TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
# Show full diff results, part of TestCase so does not follow our naming
# conventions.
maxDiff = None
def _GetTestFilePath(self, path_segments):
"""Retrieves the path of a test file relative to the test data directory.
Args:
path_segments: the path segments inside the test data directory.
Returns:
A path of the test file.
"""
# Note that we need to pass the individual path segments to os.path.join
# and not a list.
return os.path.join(self._TEST_DATA_PATH, *path_segments)
class CollectorTest(CollectorTestCase):
"""Tests for the collector."""
def testFileSystemCollection(self):
"""Test collection on the file system."""
test_files = [
self._GetTestFilePath([u'syslog.tgz']),
self._GetTestFilePath([u'syslog.zip']),
self._GetTestFilePath([u'syslog.bz2']),
self._GetTestFilePath([u'wtmp.1'])]
with TempDirectory() as dirname:
for a_file in test_files:
shutil.copy(a_file, dirname)
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
test_collection_queue = single_process.SingleProcessQueue()
resolver_context = context.Context()
test_collector = collector.Collector(
test_collection_queue, dirname, path_spec,
resolver_context=resolver_context)
test_collector.Collect()
test_collector_queue_consumer = TestCollectorQueueConsumer(
test_collection_queue)
test_collector_queue_consumer.ConsumeItems()
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
def testFileSystemWithFilterCollection(self):
"""Test collection on the file system with a filter."""
dirname = u'.'
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
filter_name = ''
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
filter_name = temp_file.name
temp_file.write('/test_data/testdir/filter_.+.txt\n')
temp_file.write('/test_data/.+evtx\n')
temp_file.write('/AUTHORS\n')
temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
test_collection_queue = single_process.SingleProcessQueue()
resolver_context = context.Context()
test_collector = collector.Collector(
test_collection_queue, dirname, path_spec,
resolver_context=resolver_context)
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
test_collector.SetFilter(find_specs)
test_collector.Collect()
test_collector_queue_consumer = TestCollectorQueueConsumer(
test_collection_queue)
test_collector_queue_consumer.ConsumeItems()
try:
os.remove(filter_name)
except (OSError, IOError) as exception:
logging.warning((
u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
filter_name, exception))
# Two files with test_data/testdir/filter_*.txt, AUTHORS
# and test_data/System.evtx.
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
paths = test_collector_queue_consumer.GetFilePaths()
current_directory = os.getcwd()
expected_path = os.path.join(
current_directory, u'test_data', u'testdir', u'filter_1.txt')
self.assertTrue(expected_path in paths)
expected_path = os.path.join(
current_directory, u'test_data', u'testdir', u'filter_2.txt')
self.assertFalse(expected_path in paths)
expected_path = os.path.join(
current_directory, u'test_data', u'testdir', u'filter_3.txt')
self.assertTrue(expected_path in paths)
expected_path = os.path.join(
current_directory, u'AUTHORS')
self.assertTrue(expected_path in paths)
def testImageCollection(self):
"""Test collection on a storage media image file.
This images has two files:
+ logs/hidden.zip
+ logs/sys.tgz
The hidden.zip file contains one file, syslog, which is the
same for sys.tgz.
The end results should therefore be:
+ logs/hidden.zip (unchanged)
+ logs/hidden.zip:syslog (the text file extracted out)
+ logs/sys.tgz (unchanged)
+ logs/sys.tgz (read as a GZIP file, so not compressed)
+ logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
+ logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)
This means that the collection script should collect 6 files in total.
"""
test_file = self._GetTestFilePath([u'syslog_image.dd'])
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=volume_path_spec)
test_collection_queue = single_process.SingleProcessQueue()
resolver_context = context.Context()
test_collector = collector.Collector(
test_collection_queue, test_file, path_spec,
resolver_context=resolver_context)
test_collector.Collect()
test_collector_queue_consumer = TestCollectorQueueConsumer(
test_collection_queue)
test_collector_queue_consumer.ConsumeItems()
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 3)
def testImageWithFilterCollection(self):
"""Test collection on a storage media image file with a filter."""
test_file = self._GetTestFilePath([u'ímynd.dd'])
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=volume_path_spec)
filter_name = ''
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
filter_name = temp_file.name
temp_file.write('/a_directory/.+zip\n')
temp_file.write('/a_directory/another.+\n')
temp_file.write('/passwords.txt\n')
test_collection_queue = single_process.SingleProcessQueue()
resolver_context = context.Context()
test_collector = collector.Collector(
test_collection_queue, test_file, path_spec,
resolver_context=resolver_context)
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
test_collector.SetFilter(find_specs)
test_collector.Collect()
test_collector_queue_consumer = TestCollectorQueueConsumer(
test_collection_queue)
test_collector_queue_consumer.ConsumeItems()
try:
os.remove(filter_name)
except (OSError, IOError) as exception:
logging.warning((
u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
filter_name, exception))
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
paths = test_collector_queue_consumer.GetFilePaths()
# path_specs[0]
# type: TSK
# file_path: '/a_directory/another_file'
# container_path: 'test_data/ímynd.dd'
# image_offset: 0
self.assertEquals(paths[0], u'/a_directory/another_file')
# path_specs[1]
# type: TSK
# file_path: '/passwords.txt'
# container_path: 'test_data/ímynd.dd'
# image_offset: 0
self.assertEquals(paths[1], u'/passwords.txt')
class BuildFindSpecsFromFileTest(unittest.TestCase):
"""Tests for the BuildFindSpecsFromFile function."""
def testBuildFindSpecsFromFile(self):
"""Tests the BuildFindSpecsFromFile function."""
filter_name = ''
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
filter_name = temp_file.name
# 2 hits.
temp_file.write('/test_data/testdir/filter_.+.txt\n')
# A single hit.
temp_file.write('/test_data/.+evtx\n')
# A single hit.
temp_file.write('/AUTHORS\n')
temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
# This should not compile properly, missing file information.
temp_file.write('failing/\n')
# This should not fail during initial loading, but fail later on.
temp_file.write('bad re (no close on that parenthesis/file\n')
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
try:
os.remove(filter_name)
except (OSError, IOError) as exception:
logging.warning(
u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
filter_name, exception))
self.assertEquals(len(find_specs), 4)
dirname = u'.'
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
searcher = file_system_searcher.FileSystemSearcher(
file_system, path_spec)
path_spec_generator = searcher.Find(find_specs=find_specs)
self.assertNotEquals(path_spec_generator, None)
path_specs = list(path_spec_generator)
# One evtx, one AUTHORS, two filter_*.txt files, total 4 files.
self.assertEquals(len(path_specs), 4)
with self.assertRaises(IOError):
_ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist')
if __name__ == '__main__':
unittest.main()
+319
View File
@@ -0,0 +1,319 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The processing engine."""
import abc
import logging
from dfvfs.helpers import file_system_searcher
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.resolver import resolver as path_spec_resolver
from plaso.artifacts import knowledge_base
from plaso.engine import collector
from plaso.engine import queue
from plaso.lib import errors
from plaso.preprocessors import interface as preprocess_interface
from plaso.preprocessors import manager as preprocess_manager
class BaseEngine(object):
"""Class that defines the processing engine base."""
def __init__(self, collection_queue, storage_queue, parse_error_queue):
"""Initialize the engine object.
Args:
collection_queue: the collection queue object (instance of Queue).
storage_queue: the storage queue object (instance of Queue).
parse_error_queue: the parser error queue object (instance of Queue).
"""
self._collection_queue = collection_queue
self._enable_debug_output = False
self._enable_profiling = False
self._event_queue_producer = queue.ItemQueueProducer(storage_queue)
self._filter_object = None
self._mount_path = None
self._open_files = False
self._parse_error_queue = parse_error_queue
self._parse_error_queue_producer = queue.ItemQueueProducer(
parse_error_queue)
self._profiling_sample_rate = 1000
self._source = None
self._source_path_spec = None
self._source_file_entry = None
self._text_prepend = None
self.knowledge_base = knowledge_base.KnowledgeBase()
self.storage_queue = storage_queue
def CreateCollector(
self, include_directory_stat, vss_stores=None, filter_find_specs=None,
resolver_context=None):
"""Creates a collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
include_directory_stat: Boolean value to indicate whether directory
stat information should be collected.
vss_stores: Optional list of VSS stores to include in the collection,
where 1 represents the first store. Set to None if no
VSS stores should be processed. The default is None.
filter_find_specs: Optional list of filter find specifications (instances
of dfvfs.FindSpec). The default is None.
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
Returns:
A collector object (instance of Collector).
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_path_spec:
raise RuntimeError(u'Missing source.')
collector_object = collector.Collector(
self._collection_queue, self._source, self._source_path_spec,
resolver_context=resolver_context)
collector_object.SetCollectDirectoryMetadata(include_directory_stat)
if vss_stores:
collector_object.SetVssInformation(vss_stores)
if filter_find_specs:
collector_object.SetFilter(filter_find_specs)
return collector_object
@abc.abstractmethod
def CreateExtractionWorker(self, worker_number):
"""Creates an extraction worker object.
Args:
worker_number: A number that identifies the worker.
Returns:
An extraction worker (instance of worker.ExtractionWorker).
"""
def GetSourceFileSystemSearcher(self, resolver_context=None):
"""Retrieves the file system searcher of the source.
Args:
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
Returns:
The file system searcher object (instance of dfvfs.FileSystemSearcher).
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_path_spec:
raise RuntimeError(u'Missing source.')
file_system = path_spec_resolver.Resolver.OpenFileSystem(
self._source_path_spec, resolver_context=resolver_context)
type_indicator = self._source_path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
mount_point = self._source_path_spec
else:
mount_point = self._source_path_spec.parent
return file_system_searcher.FileSystemSearcher(file_system, mount_point)
def PreprocessSource(self, platform, resolver_context=None):
"""Preprocesses the source and fills the preprocessing object.
Args:
platform: string that indicates the platform (operating system).
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
"""
searcher = self.GetSourceFileSystemSearcher(
resolver_context=resolver_context)
if not platform:
platform = preprocess_interface.GuessOS(searcher)
self.knowledge_base.platform = platform
preprocess_manager.PreprocessPluginsManager.RunPlugins(
platform, searcher, self.knowledge_base)
def SetEnableDebugOutput(self, enable_debug_output):
"""Enables or disables debug output.
Args:
enable_debug_output: boolean value to indicate if the debug output
should be enabled.
"""
self._enable_debug_output = enable_debug_output
def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
"""Enables or disables profiling.
Args:
enable_debug_output: boolean value to indicate if the profiling
should be enabled.
profiling_sample_rate: optional integer indicating the profiling sample
rate. The value contains the number of files
processed. The default value is 1000.
"""
self._enable_profiling = enable_profiling
self._profiling_sample_rate = profiling_sample_rate
def SetFilterObject(self, filter_object):
"""Sets the filter object.
Args:
filter_object: the filter object (instance of objectfilter.Filter).
"""
self._filter_object = filter_object
def SetMountPath(self, mount_path):
"""Sets the mount path.
Args:
mount_path: string containing the mount path.
"""
self._mount_path = mount_path
# TODO: rename this mode.
def SetOpenFiles(self, open_files):
"""Sets the open files mode.
Args:
open_files: boolean value to indicate if the worker should scan for
file entries inside files.
"""
self._open_files = open_files
def SetSource(self, source_path_spec, resolver_context=None):
"""Sets the source.
Args:
source_path_spec: The source path specification (instance of
dfvfs.PathSpec) as determined by the file system
scanner. The default is None.
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
Raises:
BadConfigOption: if source cannot be set.
"""
path_spec = source_path_spec
while path_spec.parent:
path_spec = path_spec.parent
# Note that source should be used for output purposes only.
self._source = getattr(path_spec, 'location', u'')
self._source_path_spec = source_path_spec
self._source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
self._source_path_spec, resolver_context=resolver_context)
if not self._source_file_entry:
raise errors.BadConfigOption(
u'No such device, file or directory: {0:s}.'.format(self._source))
if (not self._source_file_entry.IsDirectory() and
not self._source_file_entry.IsFile() and
not self._source_file_entry.IsDevice()):
raise errors.CollectorError(
u'Source path: {0:s} not a device, file or directory.'.format(
self._source))
if self._source_path_spec.type_indicator in [
dfvfs_definitions.TYPE_INDICATOR_OS,
dfvfs_definitions.TYPE_INDICATOR_FAKE]:
if self._source_file_entry.IsFile():
logging.debug(u'Starting a collection on a single file.')
# No need for multiple workers when parsing a single file.
elif not self._source_file_entry.IsDirectory():
raise errors.BadConfigOption(
u'Source: {0:s} has to be a file or directory.'.format(
self._source))
# TODO: remove this functionality.
def SetTextPrepend(self, text_prepend):
"""Sets the text prepend.
Args:
text_prepend: string that contains the text to prepend to every
event object.
"""
self._text_prepend = text_prepend
def SignalAbort(self):
"""Signals the engine to abort."""
logging.warning(u'Signalled abort.')
self._event_queue_producer.SignalEndOfInput()
self._parse_error_queue_producer.SignalEndOfInput()
def SignalEndOfInputStorageQueue(self):
"""Signals the storage queue no input remains."""
self._event_queue_producer.SignalEndOfInput()
self._parse_error_queue_producer.SignalEndOfInput()
def SourceIsDirectory(self):
"""Determines if the source is a directory.
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_file_entry:
raise RuntimeError(u'Missing source.')
return (not self.SourceIsStorageMediaImage() and
self._source_file_entry.IsDirectory())
def SourceIsFile(self):
"""Determines if the source is a file.
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_file_entry:
raise RuntimeError(u'Missing source.')
return (not self.SourceIsStorageMediaImage() and
self._source_file_entry.IsFile())
def SourceIsStorageMediaImage(self):
"""Determines if the source is storage media image file or device.
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_path_spec:
raise RuntimeError(u'Missing source.')
return self._source_path_spec.type_indicator not in [
dfvfs_definitions.TYPE_INDICATOR_OS,
dfvfs_definitions.TYPE_INDICATOR_FAKE]
+204
View File
@@ -0,0 +1,204 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Queue management implementation for Plaso.
This file contains an implementation of a queue used by plaso for
queue management.
The queue has been abstracted in order to provide support for different
implementations of the queueing mechanism, to support multi processing and
scalability.
"""
import abc
from plaso.lib import errors
class QueueEndOfInput(object):
"""Class that implements a queue end of input."""
class Queue(object):
"""Class that implements the queue interface."""
@abc.abstractmethod
def __len__(self):
"""Returns the estimated current number of items in the queue."""
@abc.abstractmethod
def IsEmpty(self):
"""Determines if the queue is empty."""
@abc.abstractmethod
def PushItem(self, item):
"""Pushes an item onto the queue."""
@abc.abstractmethod
def PopItem(self):
"""Pops an item off the queue."""
def SignalEndOfInput(self):
"""Signals the queue no input remains."""
self.PushItem(QueueEndOfInput())
class QueueConsumer(object):
"""Class that implements the queue consumer interface.
The consumer subscribes to updates on the queue.
"""
def __init__(self, queue_object):
"""Initializes the queue consumer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(QueueConsumer, self).__init__()
self._abort = False
self._queue = queue_object
def SignalAbort(self):
"""Signals the consumer to abort."""
self._abort = True
class QueueProducer(object):
"""Class that implements the queue producer interface.
The producer generates updates on the queue.
"""
def __init__(self, queue_object):
"""Initializes the queue producer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(QueueProducer, self).__init__()
self._abort = False
self._queue = queue_object
def SignalAbort(self):
"""Signals the producer to abort."""
self._abort = True
def SignalEndOfInput(self):
"""Signals the queue no input remains."""
self._queue.SignalEndOfInput()
class EventObjectQueueConsumer(QueueConsumer):
"""Class that implements the event object queue consumer.
The consumer subscribes to updates on the queue.
"""
@abc.abstractmethod
def _ConsumeEventObject(self, event_object, **kwargs):
"""Consumes an event object callback for ConsumeEventObjects."""
def ConsumeEventObjects(self, **kwargs):
"""Consumes the event object that are pushed on the queue.
This function will issue a callback to _ConsumeEventObject for every
event object (instance of EventObject) consumed from the queue.
Args:
kwargs: keyword arguments to pass to the _ConsumeEventObject callback.
"""
while not self._abort:
try:
item = self._queue.PopItem()
except errors.QueueEmpty:
break
if isinstance(item, QueueEndOfInput):
# Push the item back onto the queue to make sure all
# queue consumers are stopped.
self._queue.PushItem(item)
break
self._ConsumeEventObject(item, **kwargs)
self._abort = False
class ItemQueueConsumer(QueueConsumer):
"""Class that implements an item queue consumer.
The consumer subscribes to updates on the queue.
"""
@abc.abstractmethod
def _ConsumeItem(self, item):
"""Consumes an item callback for ConsumeItems.
Args:
item: the item object.
"""
def ConsumeItems(self):
"""Consumes the items that are pushed on the queue."""
while not self._abort:
try:
item = self._queue.PopItem()
except errors.QueueEmpty:
break
if isinstance(item, QueueEndOfInput):
# Push the item back onto the queue to make sure all
# queue consumers are stopped.
self._queue.PushItem(item)
break
self._ConsumeItem(item)
self._abort = False
class ItemQueueProducer(QueueProducer):
"""Class that implements an item queue producer.
The producer generates updates on the queue.
"""
def _FlushQueue(self):
"""Flushes the queue callback for the QueueFull exception."""
return
def ProduceItem(self, item):
"""Produces an item onto the queue.
Args:
item: the item object.
"""
try:
self._queue.PushItem(item)
except errors.QueueFull:
self._FlushQueue()
def ProduceItems(self, items):
"""Produces items onto the queue.
Args:
items: a list or generator of item objects.
"""
for item in items:
self.ProduceItem(item)
+366
View File
@@ -0,0 +1,366 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The single process processing engine."""
import collections
import logging
import pdb
from plaso.engine import collector
from plaso.engine import engine
from plaso.engine import queue
from plaso.engine import worker
from plaso.lib import errors
from plaso.parsers import context as parsers_context
class SingleProcessCollector(collector.Collector):
"""Class that implements a single process collector object."""
def __init__(
self, process_queue, source_path, source_path_spec,
resolver_context=None):
"""Initializes the collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
process_queue: The process queue (instance of Queue). This queue contains
the file entries that need to be processed.
source_path: Path of the source file or directory.
source_path_spec: The source path specification (instance of
dfvfs.PathSpec) as determined by the file system
scanner. The default is None.
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None.
"""
super(SingleProcessCollector, self).__init__(
process_queue, source_path, source_path_spec,
resolver_context=resolver_context)
self._extraction_worker = None
self._fs_collector = SingleProcessFileSystemCollector(process_queue)
def _FlushQueue(self):
"""Flushes the queue callback for the QueueFull exception."""
while not self._queue.IsEmpty():
logging.debug(u'Extraction worker started.')
self._extraction_worker.Run()
logging.debug(u'Extraction worker stopped.')
def SetExtractionWorker(self, extraction_worker):
"""Sets the extraction worker.
Args:
extraction_worker: the extraction worker object (instance of
EventExtractionWorker).
"""
self._extraction_worker = extraction_worker
self._fs_collector.SetExtractionWorker(extraction_worker)
class SingleProcessEngine(engine.BaseEngine):
"""Class that defines the single process engine."""
def __init__(self, maximum_number_of_queued_items=0):
"""Initialize the single process engine object.
Args:
maximum_number_of_queued_items: The maximum number of queued items.
The default is 0, which represents
no limit.
"""
collection_queue = SingleProcessQueue(
maximum_number_of_queued_items=maximum_number_of_queued_items)
storage_queue = SingleProcessQueue(
maximum_number_of_queued_items=maximum_number_of_queued_items)
parse_error_queue = SingleProcessQueue(
maximum_number_of_queued_items=maximum_number_of_queued_items)
super(SingleProcessEngine, self).__init__(
collection_queue, storage_queue, parse_error_queue)
self._event_queue_producer = SingleProcessItemQueueProducer(storage_queue)
self._parse_error_queue_producer = SingleProcessItemQueueProducer(
parse_error_queue)
def CreateCollector(
self, include_directory_stat, vss_stores=None, filter_find_specs=None,
resolver_context=None):
"""Creates a collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
include_directory_stat: Boolean value to indicate whether directory
stat information should be collected.
vss_stores: Optional list of VSS stores to include in the collection,
where 1 represents the first store. Set to None if no
VSS stores should be processed. The default is None.
filter_find_specs: Optional list of filter find specifications (instances
of dfvfs.FindSpec). The default is None.
resolver_context: Optional resolver context (instance of dfvfs.Context).
The default is None. Note that every thread or process
must have its own resolver context.
Returns:
A collector object (instance of Collector).
Raises:
RuntimeError: if source path specification is not set.
"""
if not self._source_path_spec:
raise RuntimeError(u'Missing source.')
collector_object = SingleProcessCollector(
self._collection_queue, self._source, self._source_path_spec,
resolver_context=resolver_context)
collector_object.SetCollectDirectoryMetadata(include_directory_stat)
if vss_stores:
collector_object.SetVssInformation(vss_stores)
if filter_find_specs:
collector_object.SetFilter(filter_find_specs)
return collector_object
def CreateExtractionWorker(self, worker_number):
"""Creates an extraction worker object.
Args:
worker_number: A number that identifies the worker.
Returns:
An extraction worker (instance of worker.ExtractionWorker).
"""
parser_context = parsers_context.ParserContext(
self._event_queue_producer, self._parse_error_queue_producer,
self.knowledge_base)
extraction_worker = SingleProcessEventExtractionWorker(
worker_number, self._collection_queue, self._event_queue_producer,
self._parse_error_queue_producer, parser_context)
extraction_worker.SetEnableDebugOutput(self._enable_debug_output)
# TODO: move profiler in separate object.
extraction_worker.SetEnableProfiling(
self._enable_profiling,
profiling_sample_rate=self._profiling_sample_rate)
if self._open_files:
extraction_worker.SetOpenFiles(self._open_files)
if self._filter_object:
extraction_worker.SetFilterObject(self._filter_object)
if self._mount_path:
extraction_worker.SetMountPath(self._mount_path)
if self._text_prepend:
extraction_worker.SetTextPrepend(self._text_prepend)
return extraction_worker
def ProcessSource(
self, collector_object, storage_writer, parser_filter_string=None):
"""Processes the source and extracts event objects.
Args:
collector_object: A collector object (instance of Collector).
storage_writer: A storage writer object (instance of BaseStorageWriter).
parser_filter_string: Optional parser filter string. The default is None.
"""
extraction_worker = self.CreateExtractionWorker(0)
extraction_worker.InitalizeParserObjects(
parser_filter_string=parser_filter_string)
# Set the extraction worker and storage writer values so that they
# can be accessed if the QueueFull exception is raised. This is
# needed in single process mode to prevent the queue consuming too
# much memory.
collector_object.SetExtractionWorker(extraction_worker)
self._event_queue_producer.SetStorageWriter(storage_writer)
self._parse_error_queue_producer.SetStorageWriter(storage_writer)
logging.debug(u'Processing started.')
logging.debug(u'Collection started.')
collector_object.Collect()
logging.debug(u'Collection stopped.')
logging.debug(u'Extraction worker started.')
extraction_worker.Run()
logging.debug(u'Extraction worker stopped.')
self._event_queue_producer.SignalEndOfInput()
logging.debug(u'Storage writer started.')
storage_writer.WriteEventObjects()
logging.debug(u'Storage writer stopped.')
# Reset the extraction worker and storage writer values to return
# the objects in their original state. This will prevent access
# to the extraction worker outside this function and allow it
# to be garbage collected.
self._event_queue_producer.SetStorageWriter(None)
self._parse_error_queue_producer.SetStorageWriter(None)
collector_object.SetExtractionWorker(None)
logging.debug(u'Processing completed.')
class SingleProcessEventExtractionWorker(worker.BaseEventExtractionWorker):
"""Class that defines the single process event extraction worker."""
def _DebugParseFileEntry(self):
"""Callback for debugging file entry parsing failures."""
pdb.post_mortem()
class SingleProcessFileSystemCollector(collector.FileSystemCollector):
"""Class that implements a single process file system collector object."""
def __init__(self, process_queue):
"""Initializes the collector object.
The collector discovers all the files that need to be processed by
the workers. Once a file is discovered it is added to the process queue
as a path specification (instance of dfvfs.PathSpec).
Args:
process_queue: The process queue (instance of Queue). This queue contains
the file entries that need to be processed.
"""
super(SingleProcessFileSystemCollector, self).__init__(process_queue)
self._extraction_worker = None
def _FlushQueue(self):
"""Flushes the queue callback for the QueueFull exception."""
while not self._queue.IsEmpty():
logging.debug(u'Extraction worker started.')
self._extraction_worker.Run()
logging.debug(u'Extraction worker stopped.')
def SetExtractionWorker(self, extraction_worker):
"""Sets the extraction worker.
Args:
extraction_worker: the extraction worker object (instance of
EventExtractionWorker).
"""
self._extraction_worker = extraction_worker
class SingleProcessItemQueueProducer(queue.ItemQueueProducer):
"""Class that implements a single process item queue producer."""
def __init__(self, queue_object):
"""Initializes the queue producer.
Args:
queue_object: the queue object (instance of Queue).
"""
super(SingleProcessItemQueueProducer, self).__init__(queue_object)
self._storage_writer = None
def _FlushQueue(self):
"""Flushes the queue callback for the QueueFull exception."""
logging.debug(u'Storage writer started.')
self._storage_writer.WriteEventObjects()
logging.debug(u'Storage writer stopped.')
def SetStorageWriter(self, storage_writer):
"""Sets the storage writer.
Args:
storage_writer: the storage writer object (instance of
BaseStorageWriter).
"""
self._storage_writer = storage_writer
class SingleProcessQueue(queue.Queue):
"""Single process queue."""
def __init__(self, maximum_number_of_queued_items=0):
"""Initializes a single process queue object.
Args:
maximum_number_of_queued_items: The maximum number of queued items.
The default is 0, which represents
no limit.
"""
super(SingleProcessQueue, self).__init__()
# The Queue interface defines the maximum number of queued items to be
# 0 if unlimited as does the multi processing queue, but deque uses
# None to indicate no limit.
if maximum_number_of_queued_items == 0:
maximum_number_of_queued_items = None
# maxlen contains the maximum number of items allowed to be queued,
# where None represents unlimited.
self._queue = collections.deque(
maxlen=maximum_number_of_queued_items)
def __len__(self):
"""Returns the estimated current number of items in the queue."""
return len(self._queue)
def IsEmpty(self):
"""Determines if the queue is empty."""
return len(self._queue) == 0
def PushItem(self, item):
"""Pushes an item onto the queue.
Raises:
QueueFull: when the queue is full.
"""
number_of_items = len(self._queue)
# Deque will drop the first item in the queue when maxlen is exceeded.
if not self._queue.maxlen or number_of_items < self._queue.maxlen:
self._queue.append(item)
number_of_items += 1
if self._queue.maxlen and number_of_items == self._queue.maxlen:
raise errors.QueueFull
def PopItem(self):
"""Pops an item off the queue.
Raises:
QueueEmpty: when the queue is empty.
"""
try:
# Using popleft to have FIFO behavior.
return self._queue.popleft()
except IndexError:
raise errors.QueueEmpty
+133
View File
@@ -0,0 +1,133 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests the single process processing engine."""
import os
import unittest
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.helpers import file_system_searcher
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import context
from plaso.engine import single_process
from plaso.engine import test_lib
from plaso.lib import errors
class SingleProcessQueueTest(unittest.TestCase):
"""Tests the single process queue."""
_ITEMS = frozenset(['item1', 'item2', 'item3', 'item4'])
def testPushPopItem(self):
"""Tests the PushItem and PopItem functions."""
test_queue = single_process.SingleProcessQueue()
for item in self._ITEMS:
test_queue.PushItem(item)
self.assertEquals(len(test_queue), len(self._ITEMS))
test_queue.SignalEndOfInput()
test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
test_queue_consumer.ConsumeItems()
expected_number_of_items = len(self._ITEMS)
self.assertEquals(
test_queue_consumer.number_of_items, expected_number_of_items)
def testQueueEmpty(self):
"""Tests the queue raises the QueueEmpty exception."""
test_queue = single_process.SingleProcessQueue()
with self.assertRaises(errors.QueueEmpty):
test_queue.PopItem()
def testQueueFull(self):
"""Tests the queue raises the QueueFull exception."""
test_queue = single_process.SingleProcessQueue(
maximum_number_of_queued_items=5)
for item in self._ITEMS:
test_queue.PushItem(item)
with self.assertRaises(errors.QueueFull):
test_queue.PushItem('item5')
with self.assertRaises(errors.QueueFull):
test_queue.PushItem('item6')
test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
test_queue_consumer.ConsumeItems()
expected_number_of_items = len(self._ITEMS)
self.assertEquals(
test_queue_consumer.number_of_items, expected_number_of_items + 1)
class SingleProcessEngineTest(unittest.TestCase):
"""Tests for the engine object."""
_TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
def testEngine(self):
"""Test the engine functionality."""
resolver_context = context.Context()
test_engine = single_process.SingleProcessEngine(
maximum_number_of_queued_items=25000)
self.assertNotEquals(test_engine, None)
source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
os_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
source_path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=os_path_spec)
test_engine.SetSource(source_path_spec, resolver_context=resolver_context)
self.assertFalse(test_engine.SourceIsDirectory())
self.assertFalse(test_engine.SourceIsFile())
self.assertTrue(test_engine.SourceIsStorageMediaImage())
test_searcher = test_engine.GetSourceFileSystemSearcher(
resolver_context=resolver_context)
self.assertNotEquals(test_searcher, None)
self.assertIsInstance(
test_searcher, file_system_searcher.FileSystemSearcher)
test_engine.PreprocessSource('Windows')
test_collector = test_engine.CreateCollector(
False, vss_stores=None, filter_find_specs=None,
resolver_context=resolver_context)
self.assertNotEquals(test_collector, None)
self.assertIsInstance(
test_collector, single_process.SingleProcessCollector)
test_extraction_worker = test_engine.CreateExtractionWorker(0)
self.assertNotEquals(test_extraction_worker, None)
self.assertIsInstance(
test_extraction_worker,
single_process.SingleProcessEventExtractionWorker)
if __name__ == '__main__':
unittest.main()
+71
View File
@@ -0,0 +1,71 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Engine related functions and classes for testing."""
import os
import unittest
from plaso.engine import queue
class TestQueueConsumer(queue.ItemQueueConsumer):
"""Class that implements the test queue consumer.
The queue consumer subscribes to updates on the queue.
"""
def __init__(self, test_queue):
"""Initializes the queue consumer.
Args:
test_queue: the test queue (instance of Queue).
"""
super(TestQueueConsumer, self).__init__(test_queue)
self.items = []
def _ConsumeItem(self, item):
"""Consumes an item callback for ConsumeItems."""
self.items.append(item)
@property
def number_of_items(self):
"""The number of items."""
return len(self.items)
class EngineTestCase(unittest.TestCase):
"""The unit test case for a front-end."""
_TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
# Show full diff results, part of TestCase so does not follow our naming
# conventions.
maxDiff = None
def _GetTestFilePath(self, path_segments):
"""Retrieves the path of a test file relative to the test data directory.
Args:
path_segments: the path segments inside the test data directory.
Returns:
A path of the test file.
"""
# Note that we need to pass the individual path segments to os.path.join
# and not a list.
return os.path.join(self._TEST_DATA_PATH, *path_segments)
+75
View File
@@ -0,0 +1,75 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Engine utility functions."""
import logging
from dfvfs.helpers import file_system_searcher
from plaso.winreg import path_expander
def BuildFindSpecsFromFile(filter_file_path, pre_obj=None):
"""Returns a list of find specification from a filter file.
Args:
filter_file_path: A path to a file that contains find specifications.
pre_obj: A preprocessing object (instance of PreprocessObject). This is
optional but when provided takes care of expanding each segment.
"""
find_specs = []
if pre_obj:
expander = path_expander.WinRegistryKeyPathExpander()
with open(filter_file_path, 'rb') as file_object:
for line in file_object:
line = line.strip()
if line.startswith(u'#'):
continue
if pre_obj:
try:
line = expander.ExpandPath(line, pre_obj=pre_obj)
except KeyError as exception:
logging.error((
u'Unable to use collection filter line: {0:s} with error: '
u'{1:s}').format(line, exception))
continue
if not line.startswith(u'/'):
logging.warning((
u'The filter string must be defined as an abolute path: '
u'{0:s}').format(line))
continue
_, _, file_path = line.rstrip().rpartition(u'/')
if not file_path:
logging.warning(
u'Unable to parse the filter string: {0:s}'.format(line))
continue
# Convert the filter paths into a list of path segments and strip
# the root path segment.
path_segments = line.split(u'/')
path_segments.pop(0)
find_specs.append(file_system_searcher.FindSpec(
location_regex=path_segments, case_sensitive=False))
return find_specs
+352
View File
@@ -0,0 +1,352 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The event extraction worker."""
import logging
import os
from dfvfs.resolver import context
from dfvfs.resolver import resolver as path_spec_resolver
try:
from guppy import hpy
except ImportError:
hpy = None
from plaso.engine import classifier
from plaso.engine import queue
from plaso.lib import errors
from plaso.parsers import manager as parsers_manager
class BaseEventExtractionWorker(queue.ItemQueueConsumer):
"""Class that defines the event extraction worker base.
This class is designed to watch a queue for path specifications of files
and directories (file entries) for which events need to be extracted.
The event extraction worker needs to determine if a parser suitable
for parsing a particular file is available. All extracted event objects
are pushed on a storage queue for further processing.
"""
def __init__(
self, identifier, process_queue, event_queue_producer,
parse_error_queue_producer, parser_context):
"""Initializes the event extraction worker object.
Args:
identifier: The identifier, usually an incrementing integer.
process_queue: The process queue (instance of Queue). This queue contains
the file entries that need to be processed.
event_queue_producer: The event object queue producer (instance of
ItemQueueProducer).
parse_error_queue_producer: The parse error queue producer (instance of
ItemQueueProducer).
parser_context: A parser context object (instance of ParserContext).
"""
super(BaseEventExtractionWorker, self).__init__(process_queue)
self._enable_debug_output = False
self._identifier = identifier
self._open_files = False
self._parser_context = parser_context
self._filestat_parser_object = None
self._parser_objects = None
# We need a resolver context per process to prevent multi processing
# issues with file objects stored in images.
self._resolver_context = context.Context()
self._event_queue_producer = event_queue_producer
self._parse_error_queue_producer = parse_error_queue_producer
# Attributes that contain the current status of the worker.
self._current_working_file = u''
self._is_running = False
# Attributes for profiling.
self._enable_profiling = False
self._heapy = None
self._profiling_sample = 0
self._profiling_sample_rate = 1000
self._profiling_sample_file = u'{0!s}.hpy'.format(self._identifier)
def _ConsumeItem(self, path_spec):
"""Consumes an item callback for ConsumeItems.
Args:
path_spec: a path specification (instance of dfvfs.PathSpec).
"""
file_entry = path_spec_resolver.Resolver.OpenFileEntry(
path_spec, resolver_context=self._resolver_context)
if file_entry is None:
logging.warning(u'Unable to open file entry: {0:s}'.format(
path_spec.comparable))
return
try:
self.ParseFileEntry(file_entry)
except IOError as exception:
logging.warning(u'Unable to parse file: {0:s} with error: {1:s}'.format(
path_spec.comparable, exception))
def _DebugParseFileEntry(self):
"""Callback for debugging file entry parsing failures."""
return
def _ParseFileEntryWithParser(self, parser_object, file_entry):
"""Parses a file entry with a specific parser.
Args:
parser_object: A parser object (instance of BaseParser).
file_entry: A file entry object (instance of dfvfs.FileEntry).
Raises:
QueueFull: If a queue is full.
"""
try:
parser_object.Parse(self._parser_context, file_entry)
except errors.UnableToParseFile as exception:
logging.debug(u'Not a {0:s} file ({1:s}) - {2:s}'.format(
parser_object.NAME, file_entry.name, exception))
except errors.QueueFull:
raise
except IOError as exception:
logging.debug(
u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format(
parser_object.NAME, file_entry.path_spec.comparable,
exception))
# Casting a wide net, catching all exceptions. Done to keep the worker
# running, despite the parser hitting errors, so the worker doesn't die
# if a single file is corrupted or there is a bug in a parser.
except Exception as exception:
logging.warning(
u'[{0:s}] Unable to process file: {1:s} with error: {2:s}.'.format(
parser_object.NAME, file_entry.path_spec.comparable,
exception))
logging.debug(
u'The path specification that caused the error: {0:s}'.format(
file_entry.path_spec.comparable))
logging.exception(exception)
if self._enable_debug_output:
self._DebugParseFileEntry()
def _ProfilingStart(self):
"""Starts the profiling."""
self._heapy.setrelheap()
self._profiling_sample = 0
try:
os.remove(self._profiling_sample_file)
except OSError:
pass
def _ProfilingStop(self):
"""Stops the profiling."""
self._ProfilingWriteSample()
def _ProfilingUpdate(self):
"""Updates the profiling."""
self._profiling_sample += 1
if self._profiling_sample >= self._profiling_sample_rate:
self._ProfilingWriteSample()
self._profiling_sample = 0
def _ProfilingWriteSample(self):
"""Writes a profiling sample to the sample file."""
heap = self._heapy.heap()
heap.dump(self._profiling_sample_file)
def GetStatus(self):
"""Returns a status dictionary."""
return {
'is_running': self._is_running,
'identifier': u'Worker_{0:d}'.format(self._identifier),
'current_file': self._current_working_file,
'counter': self._parser_context.number_of_events}
def InitalizeParserObjects(self, parser_filter_string=None):
"""Initializes the parser objects.
The parser_filter_string is a simple comma separated value string that
denotes a list of parser names to include and/or exclude. Each entry
can have the value of:
+ Exact match of a list of parsers, or a preset (see
plaso/frontend/presets.py for a full list of available presets).
+ A name of a single parser (case insensitive), eg. msiecfparser.
+ A glob name for a single parser, eg: '*msie*' (case insensitive).
Args:
parser_filter_string: Optional parser filter string. The default is None.
"""
self._parser_objects = parsers_manager.ParsersManager.GetParserObjects(
parser_filter_string=parser_filter_string)
for parser_object in self._parser_objects:
if parser_object.NAME == 'filestat':
self._filestat_parser_object = parser_object
break
def ParseFileEntry(self, file_entry):
"""Parses a file entry.
Args:
file_entry: A file entry object (instance of dfvfs.FileEntry).
"""
logging.debug(u'[ParseFileEntry] Parsing: {0:s}'.format(
file_entry.path_spec.comparable))
self._current_working_file = getattr(
file_entry.path_spec, u'location', file_entry.name)
if file_entry.IsDirectory() and self._filestat_parser_object:
self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry)
elif file_entry.IsFile():
# TODO: Not go through all parsers, just the ones
# that the classifier classifies the file as.
for parser_object in self._parser_objects:
logging.debug(u'Trying to parse: {0:s} with parser: {1:s}'.format(
file_entry.name, parser_object.NAME))
self._ParseFileEntryWithParser(parser_object, file_entry)
logging.debug(u'[ParseFileEntry] Done parsing: {0:s}'.format(
file_entry.path_spec.comparable))
if self._enable_profiling:
self._ProfilingUpdate()
if self._open_files:
try:
for sub_file_entry in classifier.Classifier.SmartOpenFiles(file_entry):
if self._abort:
break
self.ParseFileEntry(sub_file_entry)
except IOError as exception:
logging.warning(
u'Unable to parse file: {0:s} with error: {1:s}'.format(
file_entry.path_spec.comparable, exception))
def Run(self):
"""Extracts event objects from file entries."""
self._parser_context.ResetCounters()
if self._enable_profiling:
self._ProfilingStart()
self._is_running = True
logging.info(
u'Worker {0:d} (PID: {1:d}) started monitoring process queue.'.format(
self._identifier, os.getpid()))
self.ConsumeItems()
logging.info(
u'Worker {0:d} (PID: {1:d}) stopped monitoring process queue.'.format(
self._identifier, os.getpid()))
self._current_working_file = u''
self._is_running = False
if self._enable_profiling:
self._ProfilingStop()
self._resolver_context.Empty()
def SetEnableDebugOutput(self, enable_debug_output):
"""Enables or disables debug output.
Args:
enable_debug_output: boolean value to indicate if the debug output
should be enabled.
"""
self._enable_debug_output = enable_debug_output
def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
"""Enables or disables profiling.
Args:
enable_debug_output: boolean value to indicate if the profiling
should be enabled.
profiling_sample_rate: optional integer indicating the profiling sample
rate. The value contains the number of files
processed. The default value is 1000.
"""
if hpy:
self._enable_profiling = enable_profiling
self._profiling_sample_rate = profiling_sample_rate
if self._enable_profiling and not self._heapy:
self._heapy = hpy()
def SetFilterObject(self, filter_object):
"""Sets the filter object.
Args:
filter_object: the filter object (instance of objectfilter.Filter).
"""
self._parser_context.SetFilterObject(filter_object)
def SetMountPath(self, mount_path):
"""Sets the mount path.
Args:
mount_path: string containing the mount path.
"""
self._parser_context.SetMountPath(mount_path)
# TODO: rename this mode.
def SetOpenFiles(self, open_files):
"""Sets the open files mode.
Args:
open_files: boolean value to indicate if the worker should scan for
file entries inside files.
"""
self._open_files = open_files
def SetTextPrepend(self, text_prepend):
"""Sets the text prepend.
Args:
text_prepend: string that contains the text to prepend to every
event object.
"""
self._parser_context.SetTextPrepend(text_prepend)
def SignalAbort(self):
"""Signals the worker to abort."""
super(BaseEventExtractionWorker, self).SignalAbort()
self._parser_context.SignalAbort()
@classmethod
def SupportsProfiling(cls):
"""Returns a boolean value to indicate if profiling is supported."""
return hpy is not None
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+92
View File
@@ -0,0 +1,92 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file is the template for Plist events."""
from plaso.events import time_events
from plaso.lib import eventdata
class PlistEvent(time_events.PythonDatetimeEvent):
"""Convenience class for a plist events."""
DATA_TYPE = 'plist:key'
def __init__(self, root, key, timestamp, desc=None, host=None, user=None):
"""Template for creating a Plist EventObject for returning data to Plaso.
All events extracted from files get passed around Plaso internally as an
EventObject. PlistEvent is an EventObject with attributes specifically
relevant to data extracted from a Plist file. The attribute DATA_TYPE
'plist:key' allows the formatter used during output to identify
the appropriate formatter for converting these attributes to output.
Args:
root: A string representing the path from the root to this key.
key: A string representing the name of key.
timestamp: The date object (instance of datetime.datetime).
desc: An optional string intended for the user describing the event.
host: An optional host name if one is available within the log file.
user: An optional user name if one is available within the log file.
"""
super(PlistEvent, self).__init__(
timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
self.root = root
self.key = key
if desc:
self.desc = desc
if host:
self.hostname = host
if user:
self.username = user
class PlistTimeEvent(time_events.TimestampEvent):
"""Convenience class for a plist event that does not use datetime objects."""
DATA_TYPE = 'plist:key'
def __init__(self, root, key, timestamp, desc=None, host=None, user=None):
"""Template for creating a Plist EventObject for returning data to Plaso.
All events extracted from files get passed around Plaso internally as an
EventObject. PlistEvent is an EventObject with attributes specifically
relevant to data extracted from a Plist file. The attribute DATA_TYPE
'plist:key' allows the formatter used during output to identify
the appropriate formatter for converting these attributes to output.
Args:
root: A string representing the path from the root to this key.
key: A string representing the name of key.
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC.
desc: An optional string intended for the user describing the event.
host: An optional host name if one is available within the log file.
user: An optional user name if one is available within the log file.
"""
super(PlistTimeEvent, self).__init__(
timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
self.root = root
self.key = key
if desc:
self.desc = desc
if host:
self.hostname = host
if user:
self.username = user
+50
View File
@@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the shell item specific event object classes."""
from plaso.events import time_events
class ShellItemFileEntryEvent(time_events.FatDateTimeEvent):
"""Convenience class for a shell item file entry event."""
DATA_TYPE = 'windows:shell_item:file_entry'
def __init__(
self, fat_date_time, usage, name, long_name, localized_name,
file_reference, origin):
"""Initializes an event object.
Args:
fat_date_time: The FAT date time value.
usage: The description of the usage of the time value.
name: A string containing the name of the file entry shell item.
long_name: A string containing the long name of the file entry shell item.
localized_name: A string containing the localized name of the file entry
shell item.
file_reference: A string containing the NTFS file reference
(MTF entry - sequence number).
origin: A string containing the origin of the event (event source).
"""
super(ShellItemFileEntryEvent, self).__init__(fat_date_time, usage)
self.name = name
self.long_name = long_name
self.localized_name = localized_name
self.file_reference = file_reference
self.origin = origin
+48
View File
@@ -0,0 +1,48 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the text format specific event object classes."""
from plaso.events import time_events
from plaso.lib import eventdata
class TextEvent(time_events.TimestampEvent):
"""Convenience class for a text format-based event."""
DATA_TYPE = 'text:entry'
def __init__(self, timestamp, offset, attributes):
"""Initializes a text event object.
Args:
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC.
offset: The offset of the attributes.
attributes: A dict that contains the events attributes.
"""
super(TextEvent, self).__init__(
timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
self.offset = offset
for name, value in attributes.iteritems():
# TODO: Revisit this constraints and see if we can implement
# it using a more sane solution.
if isinstance(value, basestring) and not value:
continue
setattr(self, name, value)
+157
View File
@@ -0,0 +1,157 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the time-based event object classes."""
from plaso.lib import event
from plaso.lib import timelib
class TimestampEvent(event.EventObject):
"""Convenience class for a timestamp-based event."""
def __init__(self, timestamp, usage, data_type=None):
"""Initializes an event object.
Args:
timestamp: The timestamp value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(TimestampEvent, self).__init__()
self.timestamp = timestamp
self.timestamp_desc = usage
if data_type:
self.data_type = data_type
class CocoaTimeEvent(TimestampEvent):
"""Convenience class for a Cocoa time-based event."""
def __init__(self, cocoa_time, usage, data_type=None):
"""Initializes an event object.
Args:
cocoa_time: The Cocoa time value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(CocoaTimeEvent, self).__init__(
timelib.Timestamp.FromCocoaTime(cocoa_time), usage,
data_type=data_type)
class FatDateTimeEvent(TimestampEvent):
"""Convenience class for a FAT date time-based event."""
def __init__(self, fat_date_time, usage, data_type=None):
"""Initializes an event object.
Args:
fat_date_time: The FAT date time value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(FatDateTimeEvent, self).__init__(
timelib.Timestamp.FromFatDateTime(fat_date_time), usage,
data_type=data_type)
class FiletimeEvent(TimestampEvent):
"""Convenience class for a FILETIME timestamp-based event."""
def __init__(self, filetime, usage, data_type=None):
"""Initializes an event object.
Args:
filetime: The FILETIME timestamp value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(FiletimeEvent, self).__init__(
timelib.Timestamp.FromFiletime(filetime), usage, data_type=data_type)
class JavaTimeEvent(TimestampEvent):
"""Convenience class for a Java time-based event."""
def __init__(self, java_time, usage, data_type=None):
"""Initializes an event object.
Args:
java_time: The Java time value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(JavaTimeEvent, self).__init__(
timelib.Timestamp.FromJavaTime(java_time), usage, data_type=data_type)
class PosixTimeEvent(TimestampEvent):
"""Convenience class for a POSIX time-based event."""
def __init__(self, posix_time, usage, data_type=None):
"""Initializes an event object.
Args:
posix_time: The POSIX time value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(PosixTimeEvent, self).__init__(
timelib.Timestamp.FromPosixTime(posix_time), usage, data_type=data_type)
class PythonDatetimeEvent(TimestampEvent):
"""Convenience class for a Python DateTime time-based event."""
def __init__(self, datetime_time, usage, data_type=None):
"""Initializes an event object.
Args:
datetime_time: The datetime object (instance of datetime.datetime).
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(PythonDatetimeEvent, self).__init__(
timelib.Timestamp.FromPythonDatetime(datetime_time), usage,
data_type=data_type)
class WebKitTimeEvent(TimestampEvent):
"""Convenience class for a WebKit time-based event."""
def __init__(self, webkit_time, usage, data_type=None):
"""Initializes an event object.
Args:
webkit_time: The WebKit time value.
usage: The description of the usage of the time value.
data_type: Optional event data type. If not set data_type is
derived from the DATA_TYPE attribute.
"""
super(WebKitTimeEvent, self).__init__(
timelib.Timestamp.FromWebKitTime(webkit_time), usage,
data_type=data_type)
+95
View File
@@ -0,0 +1,95 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the Windows specific event object classes."""
from plaso.events import time_events
from plaso.lib import eventdata
class WindowsVolumeCreationEvent(time_events.FiletimeEvent):
"""Convenience class for a Windows volume creation event."""
DATA_TYPE = 'windows:volume:creation'
def __init__(self, filetime, device_path, serial_number, origin):
"""Initializes an event object.
Args:
filetime: The FILETIME timestamp value.
device_path: A string containing the volume device path.
serial_number: A string containing the volume serial number.
origin: A string containing the origin of the event (event source).
"""
super(WindowsVolumeCreationEvent, self).__init__(
filetime, eventdata.EventTimestamp.CREATION_TIME)
self.device_path = device_path
self.serial_number = serial_number
self.origin = origin
class WindowsRegistryEvent(time_events.TimestampEvent):
"""Convenience class for a Windows Registry-based event."""
DATA_TYPE = 'windows:registry:key_value'
def __init__(
self, timestamp, key_name, value_dict, usage=None, offset=None,
registry_type=None, urls=None, source_append=None):
"""Initializes a Windows registry event.
Args:
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC.
key_name: The name of the Registry key being parsed.
value_dict: The interpreted value of the key, stored as a dictionary.
usage: Optional description of the usage of the time value.
The default is None.
offset: Optional (data) offset of the Registry key or value.
The default is None.
registry_type: Optional Registry type string. The default is None.
urls: Optional list of URLs. The default is None.
source_append: Optional string to append to the source_long of the event.
The default is None.
"""
if usage is None:
usage = eventdata.EventTimestamp.WRITTEN_TIME
super(WindowsRegistryEvent, self).__init__(timestamp, usage)
if key_name:
self.keyname = key_name
self.regvalue = value_dict
if offset or type(offset) in [int, long]:
self.offset = offset
if registry_type:
self.registry_type = registry_type
if urls:
self.url = u' - '.join(urls)
if source_append:
self.source_append = source_append
class WindowsRegistryServiceEvent(WindowsRegistryEvent):
"""Convenience class for service entries retrieved from the registry."""
DATA_TYPE = 'windows:registry:service'
+56
View File
@@ -0,0 +1,56 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains an import statement for each filter."""
import logging
from plaso.filters import dynamic_filter
from plaso.filters import eventfilter
from plaso.filters import filterlist
from plaso.lib import filter_interface
from plaso.lib import errors
def ListFilters():
"""Generate a list of all available filters."""
filters = []
for cl in filter_interface.FilterObject.classes:
filters.append(filter_interface.FilterObject.classes[cl]())
return filters
def GetFilter(filter_string):
"""Returns the first filter that matches the filter string.
Args:
filter_string: A filter string for any of the available filters.
Returns:
The first FilterObject found matching the filter string. If no FilterObject
is available for this filter string None is returned.
"""
if not filter_string:
return
for filter_obj in ListFilters():
try:
filter_obj.CompileFilter(filter_string)
return filter_obj
except errors.WrongPlugin:
logging.debug(u'Filterstring [{}] is not a filter: {}'.format(
filter_string, filter_obj.filter_name))
+162
View File
@@ -0,0 +1,162 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains definition for a selective fields EventObjectFilter."""
from plaso.lib import errors
from plaso.lib import lexer
from plaso.filters import eventfilter
class SelectiveLexer(lexer.Lexer):
"""A simple selective filter lexer implementation."""
tokens = [
lexer.Token('INITIAL', r'SELECT', '', 'FIELDS'),
lexer.Token('FIELDS', r'(.+) WHERE ', 'SetFields', 'FILTER'),
lexer.Token('FIELDS', r'(.+) LIMIT', 'SetFields', 'LIMIT_END'),
lexer.Token('FIELDS', r'(.+) SEPARATED BY', 'SetFields', 'SEPARATE'),
lexer.Token('FIELDS', r'(.+)$', 'SetFields', 'END'),
lexer.Token('FILTER', r'(.+) SEPARATED BY', 'SetFilter', 'SEPARATE'),
lexer.Token('FILTER', r'(.+) LIMIT', 'SetFilter', 'LIMIT_END'),
lexer.Token('FILTER', r'(.+)$', 'SetFilter', 'END'),
lexer.Token('SEPARATE', r' ', '', ''), # Ignore white space here.
lexer.Token('SEPARATE', r'LIMIT', '', 'LIMIT_END'),
lexer.Token(
'SEPARATE', r'[\'"]([^ \'"]+)[\'"] LIMIT', 'SetSeparator',
'LIMIT_END'),
lexer.Token(
'SEPARATE', r'[\'"]([^ \'"]+)[\'"]$', 'SetSeparator', 'END'),
lexer.Token(
'SEPARATE', r'(.+)$', 'SetSeparator', 'END'),
lexer.Token(
'LIMIT_END', r'SEPARATED BY [\'"]([^\'"]+)[\'"]', 'SetSeparator', ''),
lexer.Token('LIMIT_END', r'(.+) SEPARATED BY', 'SetLimit', 'SEPARATE'),
lexer.Token('LIMIT_END', r'(.+)$', 'SetLimit', 'END')]
def __init__(self, data=''):
"""Initialize the lexer."""
self.fields = []
self.limit = 0
self.lex_filter = None
self.separator = u','
super(SelectiveLexer, self).__init__(data)
def SetFilter(self, match, **_):
"""Set the filter query."""
filter_match = match.group(1)
if 'LIMIT' in filter_match:
# This only occurs in the case where we have "LIMIT X SEPARATED BY".
self.lex_filter, _, push_back = filter_match.rpartition('LIMIT')
self.PushBack('LIMIT {} SEPARATED BY '.format(push_back))
else:
self.lex_filter = filter_match
def SetSeparator(self, match, **_):
"""Set the separator of the output, only uses the first char."""
separator = match.group(1)
if separator:
self.separator = separator[0]
def SetLimit(self, match, **_):
"""Set the row limit."""
try:
limit = int(match.group(1))
except ValueError:
self.Error('Invalid limit value, should be int [{}] = {}'.format(
type(match.group(1)), match.group(1)))
limit = 0
self.limit = limit
def SetFields(self, match, **_):
"""Set the selective fields."""
text = match.group(1).lower()
field_text, _, _ = text.partition(' from ')
use_field_text = field_text.replace(' ', '')
if ',' in use_field_text:
self.fields = use_field_text.split(',')
else:
self.fields = [use_field_text]
class DynamicFilter(eventfilter.EventObjectFilter):
"""A twist to the EventObjectFilter allowing output fields to be selected.
This filter is essentially the same as the EventObjectFilter except it wraps
it in a selection of which fields should be included by an output module that
has support for selective fields. That is to say the filter:
SELECT field_a, field_b WHERE attribute contains 'text'
Will use the EventObjectFilter "attribute contains 'text'" and at the same
time indicate to the appropriate output module that the user wants only the
fields field_a and field_b to be used in the output.
"""
@property
def fields(self):
"""Set the fields property."""
return self._fields
@property
def limit(self):
"""Return the limit of row counts."""
return self._limit
@property
def separator(self):
"""Return the separator value."""
return self._separator
def __init__(self):
"""Initialize the selective EventObjectFilter."""
super(DynamicFilter, self).__init__()
self._fields = []
self._limit = 0
self._separator = u','
def CompileFilter(self, filter_string):
"""Compile the filter string into a EventObjectFilter matcher."""
lex = SelectiveLexer(filter_string)
_ = lex.NextToken()
if lex.error:
raise errors.WrongPlugin('Malformed filter string.')
_ = lex.NextToken()
if lex.error:
raise errors.WrongPlugin('No fields defined.')
if lex.state is not 'END':
while lex.state is not 'END':
_ = lex.NextToken()
if lex.error:
raise errors.WrongPlugin('No filter defined for DynamicFilter.')
if lex.state != 'END':
raise errors.WrongPlugin(
'Malformed DynamicFilter, end state not reached.')
self._fields = lex.fields
self._limit = lex.limit
self._separator = unicode(lex.separator)
if lex.lex_filter:
super(DynamicFilter, self).CompileFilter(lex.lex_filter)
else:
self.matcher = None
+85
View File
@@ -0,0 +1,85 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the DynamicFilter filter."""
import unittest
from plaso.filters import dynamic_filter
from plaso.filters import test_helper
class DynamicFilterTest(test_helper.FilterTestHelper):
"""Tests for the DynamicFilter filter."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self.test_filter = dynamic_filter.DynamicFilter()
def testFilterFail(self):
"""Run few tests that should not be a proper filter."""
self.TestFail('/tmp/file_that_most_likely_does_not_exist')
self.TestFail('some random stuff that is destined to fail')
self.TestFail('some_stuff is "random" and other_stuff ')
self.TestFail('some_stuff is "random" and other_stuff is not "random"')
self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
self.TestFail('SELECT field_a, field_b WHERE ')
self.TestFail('SELECT field_a, field_b SEPARATED BY')
self.TestFail('SELECT field_a, SEPARATED BY field_b WHERE ')
self.TestFail('SELECT field_a, field_b LIMIT WHERE')
def testFilterApprove(self):
self.TestTrue('SELECT stuff FROM machine WHERE some_stuff is "random"')
self.TestTrue('SELECT field_a, field_b, field_c')
self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "%"')
self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10')
self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10 SEPARATED BY "|"')
self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "|" LIMIT 10')
self.TestTrue('SELECT field_a, field_b, field_c WHERE date > "2012"')
self.TestTrue(
'SELECT field_a, field_b, field_c WHERE date > "2012" LIMIT 100')
self.TestTrue((
'SELECT field_a, field_b, field_c WHERE date > "2012" SEPARATED BY "@"'
' LIMIT 100'))
self.TestTrue((
'SELECT parser, date, time WHERE some_stuff is "random" and '
'date < "2021-02-14 14:51:23"'))
def testFilterFields(self):
query = 'SELECT stuff FROM machine WHERE some_stuff is "random"'
self.test_filter.CompileFilter(query)
self.assertEquals(['stuff'], self.test_filter.fields)
query = 'SELECT stuff, a, b, date FROM machine WHERE some_stuff is "random"'
self.test_filter.CompileFilter(query)
self.assertEquals(['stuff', 'a', 'b', 'date'], self.test_filter.fields)
query = 'SELECT date, message, zone, hostname WHERE some_stuff is "random"'
self.test_filter.CompileFilter(query)
self.assertEquals(['date', 'message', 'zone', 'hostname'],
self.test_filter.fields)
query = 'SELECT hlutir'
self.test_filter.CompileFilter(query)
self.assertEquals(['hlutir'], self.test_filter.fields)
query = 'SELECT hlutir LIMIT 10'
self.test_filter.CompileFilter(query)
self.assertEquals(['hlutir'], self.test_filter.fields)
self.assertEquals(10, self.test_filter.limit)
if __name__ == '__main__':
unittest.main()
+40
View File
@@ -0,0 +1,40 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains definition for a simple filter."""
from plaso.lib import errors
from plaso.lib import filter_interface
from plaso.lib import pfilter
class EventObjectFilter(filter_interface.FilterObject):
"""A simple filter using the objectfilter library."""
def CompileFilter(self, filter_string):
"""Compile the filter string into a filter matcher."""
self.matcher = pfilter.GetMatcher(filter_string, True)
if not self.matcher:
raise errors.WrongPlugin('Malformed filter string.')
def Match(self, event_object):
"""Evaluate an EventObject against a filter."""
if not self.matcher:
return True
self._decision = self.matcher.Matches(event_object)
return self._decision
+43
View File
@@ -0,0 +1,43 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the EventObjectFilter filter."""
import unittest
from plaso.filters import test_helper
from plaso.filters import eventfilter
class EventObjectFilterTest(test_helper.FilterTestHelper):
"""Tests for the EventObjectFilter filter."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self.test_filter = eventfilter.EventObjectFilter()
def testFilterFail(self):
"""Run few tests that should not be a proper filter."""
self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
self.TestFail('/tmp/file_that_most_likely_does_not_exist')
self.TestFail('some random stuff that is destined to fail')
self.TestFail('some_stuff is "random" and other_stuff ')
def testFilterApprove(self):
self.TestTrue('some_stuff is "random" and other_stuff is not "random"')
if __name__ == '__main__':
unittest.main()
+109
View File
@@ -0,0 +1,109 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains definition for a list of ObjectFilter."""
import os
import yaml
import logging
from plaso.lib import errors
from plaso.lib import filter_interface
from plaso.lib import pfilter
def IncludeKeyword(loader, node):
"""A constructor for the include keyword in YAML."""
filename = loader.construct_scalar(node)
if os.path.isfile(filename):
with open(filename, 'rb') as fh:
try:
data = yaml.safe_load(fh)
except yaml.ParserError as exception:
logging.error(u'Unable to load rule file with error: {0:s}'.format(
exception))
return None
return data
class ObjectFilterList(filter_interface.FilterObject):
"""A series of Pfilter filters along with metadata."""
def CompileFilter(self, filter_string):
"""Compile a set of ObjectFilters defined in an YAML file."""
if not os.path.isfile(filter_string):
raise errors.WrongPlugin((
'ObjectFilterList requires an YAML file to be passed on, this filter '
'string is not a file.'))
yaml.add_constructor('!include', IncludeKeyword,
Loader=yaml.loader.SafeLoader)
results = None
with open(filter_string, 'rb') as fh:
try:
results = yaml.safe_load(fh)
except (yaml.scanner.ScannerError, IOError) as exception:
raise errors.WrongPlugin(
u'Unable to parse YAML file with error: {0:s}.'.format(exception))
self.filters = []
if type(results) is dict:
self._ParseEntry(results)
elif type(results) is list:
for result in results:
if type(result) is not dict:
raise errors.WrongPlugin(
u'Wrong format of YAML file, entry not a dict ({})'.format(
type(result)))
self._ParseEntry(result)
else:
raise errors.WrongPlugin(
u'Wrong format of YAML file, entry not a dict ({})'.format(
type(result)))
def _ParseEntry(self, entry):
"""Parse a single YAML filter entry."""
# A single file with a list of filters to parse.
for name, meta in entry.items():
if 'filter' not in meta:
raise errors.WrongPlugin(
u'Entry inside {} does not contain a filter statement.'.format(
name))
matcher = pfilter.GetMatcher(meta.get('filter'), True)
if not matcher:
raise errors.WrongPlugin(
u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format(
meta.get('filter'), name))
self.filters.append((name, matcher, meta))
def Match(self, event_object):
"""Evaluate an EventObject against a pfilter."""
if not self.filters:
return True
for name, matcher, meta in self.filters:
self._decision = matcher.Matches(event_object)
if self._decision:
self._reason = u'[{}] {} {}'.format(
name, meta.get('description', 'N/A'), u' - '.join(
meta.get('urls', [])))
return True
return False
+98
View File
@@ -0,0 +1,98 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the PFilters filter."""
import os
import logging
import tempfile
import unittest
from plaso.filters import filterlist
from plaso.filters import test_helper
class ObjectFilterTest(test_helper.FilterTestHelper):
"""Tests for the ObjectFilterList filter."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self.test_filter = filterlist.ObjectFilterList()
def testFilterFail(self):
"""Run few tests that should not be a proper filter."""
self.TestFail('SELECT stuff FROM machine WHERE conditions are met')
self.TestFail('/tmp/file_that_most_likely_does_not_exist')
self.TestFail('some random stuff that is destined to fail')
self.TestFail('some_stuff is "random" and other_stuff ')
self.TestFail('some_stuff is "random" and other_stuff is not "random"')
def CreateFileAndTest(self, content):
"""Creates a file and then runs the test."""
name = ''
with tempfile.NamedTemporaryFile(delete=False) as file_object:
name = file_object.name
file_object.write(content)
self.TestTrue(name)
try:
os.remove(name)
except (OSError, IOError) as exception:
logging.warning(
u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
name, exception))
def testFilterApprove(self):
one_rule = u'\n'.join([
u'Again_Dude:',
u' description: Heavy artillery caught on fire',
u' case_nr: 62345',
u' analysts: [anonymous]',
u' urls: [cnn.com,microsoft.com]',
u' filter: message contains "dude where is my car"'])
self.CreateFileAndTest(one_rule)
collection = u'\n'.join([
u'Rule_Dude:',
u' description: This is the very case I talk about, a lot',
u' case_nr: 1235',
u' analysts: [dude, jack, horn]',
u' urls: [mbl.is,visir.is]',
(u' filter: date > "2012-01-01 10:54:13" and parser not contains '
u'"evtx"'),
u'',
u'Again_Dude:',
u' description: Heavy artillery caught on fire',
u' case_nr: 62345',
u' analysts: [smith, perry, john]',
u' urls: [cnn.com,microsoft.com]',
u' filter: message contains "dude where is my car"',
u'',
u'Third_Rule_Of_Thumb:',
u' description: Another ticket for another day.',
u' case_nr: 234',
u' analysts: [joe]',
u' urls: [mbl.is,symantec.com/whereevillies,virustotal.com/myhash]',
u' filter: evil_bit is 1'])
self.CreateFileAndTest(collection)
if __name__ == '__main__':
unittest.main()
+50
View File
@@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains helper function and classes for filters."""
import unittest
from plaso.lib import errors
class FilterTestHelper(unittest.TestCase):
"""A simple class that provides helper functions for testing filters."""
def setUp(self):
"""This should be overwritten."""
self.test_filter = None
def TestTrue(self, query):
"""A quick test that should return a valid filter."""
if not self.test_filter:
self.assertTrue(False)
try:
self.test_filter.CompileFilter(query)
# And a success.
self.assertTrue(True)
except errors.WrongPlugin:
# Let the test fail.
self.assertTrue(False)
def TestFail(self, query):
"""A quick failure test with a filter."""
if not self.test_filter:
self.assertTrue(False)
with self.assertRaises(errors.WrongPlugin):
self.test_filter.CompileFilter(query)
+86
View File
@@ -0,0 +1,86 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains an import statement for each formatter."""
from plaso.formatters import android_app_usage
from plaso.formatters import android_calls
from plaso.formatters import android_sms
from plaso.formatters import appcompatcache
from plaso.formatters import appusage
from plaso.formatters import asl
from plaso.formatters import bencode_parser
from plaso.formatters import bsm
from plaso.formatters import chrome
from plaso.formatters import chrome_cache
from plaso.formatters import chrome_cookies
from plaso.formatters import chrome_extension_activity
from plaso.formatters import cups_ipp
from plaso.formatters import filestat
from plaso.formatters import firefox
from plaso.formatters import firefox_cache
from plaso.formatters import firefox_cookies
from plaso.formatters import ganalytics
from plaso.formatters import gdrive
from plaso.formatters import hachoir
from plaso.formatters import iis
from plaso.formatters import ipod
from plaso.formatters import java_idx
from plaso.formatters import ls_quarantine
from plaso.formatters import mac_appfirewall
from plaso.formatters import mac_document_versions
from plaso.formatters import mac_keychain
from plaso.formatters import mac_securityd
from plaso.formatters import mac_wifi
from plaso.formatters import mackeeper_cache
from plaso.formatters import mactime
from plaso.formatters import mcafeeav
from plaso.formatters import msie_webcache
from plaso.formatters import msiecf
from plaso.formatters import olecf
from plaso.formatters import opera
from plaso.formatters import oxml
from plaso.formatters import pcap
from plaso.formatters import plist
from plaso.formatters import popcontest
from plaso.formatters import pls_recall
from plaso.formatters import recycler
from plaso.formatters import rubanetra
from plaso.formatters import safari
from plaso.formatters import selinux
from plaso.formatters import shell_items
from plaso.formatters import skydrivelog
from plaso.formatters import skydrivelogerr
from plaso.formatters import skype
from plaso.formatters import symantec
from plaso.formatters import syslog
from plaso.formatters import task_scheduler
from plaso.formatters import text
from plaso.formatters import utmp
from plaso.formatters import utmpx
from plaso.formatters import windows
from plaso.formatters import winevt
from plaso.formatters import winevtx
from plaso.formatters import winfirewall
from plaso.formatters import winjob
from plaso.formatters import winlnk
from plaso.formatters import winprefetch
from plaso.formatters import winreg
from plaso.formatters import winregservice
from plaso.formatters import xchatlog
from plaso.formatters import xchatscrollback
from plaso.formatters import zeitgeist
+33
View File
@@ -0,0 +1,33 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Android Application Usage."""
from plaso.formatters import interface
class AndroidApplicationFormatter(interface.ConditionalEventFormatter):
"""Formatter for an Application Last Resumed event."""
DATA_TYPE = 'android:event:last_resume_time'
FORMAT_STRING_PIECES = [
u'Package: {package}',
u'Component: {component}']
SOURCE_LONG = 'Android App Usage'
SOURCE_SHORT = 'LOG'
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Android contacts2.db database events."""
from plaso.formatters import interface
class AndroidCallFormatter(interface.ConditionalEventFormatter):
"""Formatter for Android call history events."""
DATA_TYPE = 'android:event:call'
FORMAT_STRING_PIECES = [
u'{call_type}',
u'Number: {number}',
u'Name: {name}',
u'Duration: {duration} seconds']
FORMAT_STRING_SHORT_PIECES = [u'{call_type} Call']
SOURCE_LONG = 'Android Call History'
SOURCE_SHORT = 'LOG'
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Android mmssms.db database events."""
from plaso.formatters import interface
class AndroidSmsFormatter(interface.ConditionalEventFormatter):
"""Formatter for Android sms events."""
DATA_TYPE = 'android:messaging:sms'
FORMAT_STRING_PIECES = [
u'Type: {sms_type}',
u'Address: {address}',
u'Status: {sms_read}',
u'Message: {body}']
FORMAT_STRING_SHORT_PIECES = [u'{body}']
SOURCE_LONG = 'Android SMS messages'
SOURCE_SHORT = 'SMS'
+36
View File
@@ -0,0 +1,36 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the AppCompatCache entries inside the Windows Registry."""
from plaso.formatters import interface
class AppCompatCacheFormatter(interface.ConditionalEventFormatter):
"""Formatter for an AppCompatCache Windows Registry entry."""
DATA_TYPE = 'windows:registry:appcompatcache'
FORMAT_STRING_PIECES = [
u'[{keyname}]',
u'Cached entry: {entry_index}',
u'Path: {path}']
FORMAT_STRING_SHORT_PIECES = [u'Path: {path}']
SOURCE_LONG = 'AppCompatCache Registry Entry'
SOURCE_SHORT = 'REG'
+33
View File
@@ -0,0 +1,33 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Mac OS X application usage."""
from plaso.formatters import interface
class ApplicationUsageFormatter(interface.EventFormatter):
"""Define the formatting for Application Usage information."""
DATA_TYPE = 'macosx:application_usage'
FORMAT_STRING = (u'{application} v.{app_version} (bundle: {bundle_id}).'
' Launched: {count} time(s)')
FORMAT_STRING_SHORT = u'{application} ({count} time(s))'
SOURCE_LONG = 'Application Usage'
SOURCE_SHORT = 'LOG'
+47
View File
@@ -0,0 +1,47 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Apple System Log binary files."""
from plaso.formatters import interface
class AslFormatter(interface.ConditionalEventFormatter):
"""Formatter for an ASL log entry."""
DATA_TYPE = 'mac:asl:event'
FORMAT_STRING_PIECES = [
u'MessageID: {message_id}',
u'Level: {level}',
u'User ID: {user_sid}',
u'Group ID: {group_id}',
u'Read User: {read_uid}',
u'Read Group: {read_gid}',
u'Host: {computer_name}',
u'Sender: {sender}',
u'Facility: {facility}',
u'Message: {message}',
u'{extra_information}']
FORMAT_STRING_SHORT_PIECES = [
u'Host: {host}',
u'Sender: {sender}',
u'Facility: {facility}']
SOURCE_LONG = 'ASL entry'
SOURCE_SHORT = 'LOG'
+49
View File
@@ -0,0 +1,49 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for bencode parser events."""
from plaso.formatters import interface
class uTorrentFormatter(interface.ConditionalEventFormatter):
"""Formatter for a BitTorrent uTorrent active torrents."""
DATA_TYPE = 'p2p:bittorrent:utorrent'
SOURCE_LONG = 'uTorrent Active Torrents'
SOURCE_SHORT = 'TORRENT'
FORMAT_STRING_SEPARATOR = u'; '
FORMAT_STRING_PIECES = [u'Torrent {caption}',
u'Saved to {path}',
u'Minutes seeded: {seedtime}']
class TransmissionFormatter(interface.ConditionalEventFormatter):
"""Formatter for a Transmission active torrents."""
DATA_TYPE = 'p2p:bittorrent:transmission'
SOURCE_LONG = 'Transmission Active Torrents'
SOURCE_SHORT = 'TORRENT'
FORMAT_STRING_SEPARATOR = u'; '
FORMAT_STRING_PIECES = [u'Saved to {destination}',
u'Minutes seeded: {seedtime}']
+54
View File
@@ -0,0 +1,54 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Basic Security Module binary files."""
from plaso.formatters import interface
class MacBSMFormatter(interface.ConditionalEventFormatter):
"""Formatter for an BSM log entry."""
DATA_TYPE = 'mac:bsm:event'
FORMAT_STRING_PIECES = [
u'Type: {event_type}',
u'Return: {return_value}',
u'Information: {extra_tokens}']
FORMAT_STRING_SHORT_PIECES = [
u'Type: {event_type}',
u'Return: {return_value}']
SOURCE_LONG = 'BSM entry'
SOURCE_SHORT = 'LOG'
class BSMFormatter(interface.ConditionalEventFormatter):
"""Formatter for an BSM log entry."""
DATA_TYPE = 'bsm:event'
FORMAT_STRING_PIECES = [
u'Type: {event_type}',
u'Information: {extra_tokens}']
FORMAT_STRING_SHORT_PIECES = [
u'Type: {event_type}']
SOURCE_LONG = 'BSM entry'
SOURCE_SHORT = 'LOG'
+61
View File
@@ -0,0 +1,61 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Google Chrome history."""
from plaso.formatters import interface
class ChromePageVisitedFormatter(interface.ConditionalEventFormatter):
"""The event formatter for page visited data in Chrome History."""
DATA_TYPE = 'chrome:history:page_visited'
FORMAT_STRING_PIECES = [
u'{url}',
u'({title})',
u'[count: {typed_count}]',
u'Host: {host}',
u'Visit from: {from_visit}',
u'Visit Source: [{visit_source}]',
u'{extra}']
FORMAT_STRING_SHORT_PIECES = [
u'{url}',
u'({title})']
SOURCE_LONG = 'Chrome History'
SOURCE_SHORT = 'WEBHIST'
class ChromeFileDownloadFormatter(interface.ConditionalEventFormatter):
"""The event formatter for file downloaded data in Chrome History."""
DATA_TYPE = 'chrome:history:file_downloaded'
FORMAT_STRING_PIECES = [
u'{url}',
u'({full_path}).',
u'Received: {received_bytes} bytes',
u'out of: {total_bytes} bytes.']
FORMAT_STRING_SHORT_PIECES = [
u'{full_path} downloaded',
u'({received_bytes} bytes)']
SOURCE_LONG = 'Chrome History'
SOURCE_SHORT = 'WEBHIST'
+32
View File
@@ -0,0 +1,32 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Chrome Cache files based-events."""
from plaso.formatters import interface
class ChromeCacheEntryEventFormatter(interface.ConditionalEventFormatter):
"""Class contains the Chrome Cache Entry event formatter."""
DATA_TYPE = 'chrome:cache:entry'
FORMAT_STRING_PIECES = [
u'Original URL: {original_url}']
SOURCE_LONG = 'Chrome Cache'
SOURCE_SHORT = 'WEBHIST'
+40
View File
@@ -0,0 +1,40 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Google Chrome cookie."""
from plaso.formatters import interface
class ChromeCookieFormatter(interface.ConditionalEventFormatter):
"""The event formatter for cookie data in Chrome Cookies database."""
DATA_TYPE = 'chrome:cookie:entry'
FORMAT_STRING_PIECES = [
u'{url}',
u'({cookie_name})',
u'Flags:',
u'[HTTP only] = {httponly}',
u'[Persistent] = {persistent}']
FORMAT_STRING_SHORT_PIECES = [
u'{host}',
u'({cookie_name})']
SOURCE_LONG = 'Chrome Cookies'
SOURCE_SHORT = 'WEBHIST'
@@ -0,0 +1,47 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Google extension activity database events."""
from plaso.formatters import interface
class ChromeExtensionActivityEventFormatter(
interface.ConditionalEventFormatter):
"""The event formatter for Chrome extension activity log entries."""
DATA_TYPE = 'chrome:extension_activity:activity_log'
FORMAT_STRING_PIECES = [
u'Chrome extension: {extension_id}',
u'Action type: {action_type}',
u'Activity identifier: {activity_id}',
u'Page URL: {page_url}',
u'Page title: {page_title}',
u'API name: {api_name}',
u'Args: {args}',
u'Other: {other}']
FORMAT_STRING_SHORT_PIECES = [
u'{extension_id}',
u'{api_name}',
u'{args}']
SOURCE_LONG = 'Chrome Extension Activity'
SOURCE_SHORT = 'WEBHIST'
# TODO: add action_type string representation.
+42
View File
@@ -0,0 +1,42 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for CUPS IPP file."""
from plaso.formatters import interface
class CupsIppFormatter(interface.ConditionalEventFormatter):
"""Formatter for CUPS IPP file."""
DATA_TYPE = 'cups:ipp:event'
FORMAT_STRING_PIECES = [
u'Status: {status}',
u'User: {user}',
u'Owner: {owner}',
u'Job Name: {job_name}',
u'Application: {application}',
u'Document type: {type_doc}',
u'Printer: {printer_id}']
FORMAT_STRING_SHORT_PIECES = [
u'Status: {status}',
u'Job Name: {job_name}']
SOURCE_LONG = 'CUPS IPP Log'
SOURCE_SHORT = 'LOG'
+66
View File
@@ -0,0 +1,66 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Stat object of a PFile."""
from plaso.lib import errors
from plaso.formatters import interface
class PfileStatFormatter(interface.ConditionalEventFormatter):
"""Define the formatting for PFileStat."""
DATA_TYPE = 'fs:stat'
FORMAT_STRING_PIECES = [u'{display_name}',
u'({unallocated})']
FORMAT_STRING_SHORT_PIECES = [u'{filename}']
SOURCE_SHORT = 'FILE'
def GetSources(self, event_object):
"""Return a list of source short and long messages."""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
event_object.data_type))
self.source_string = u'{0:s} {1:s}'.format(
getattr(event_object, 'fs_type', u'Unknown FS'),
getattr(event_object, 'timestamp_desc', u'Time'))
return super(PfileStatFormatter, self).GetSources(event_object)
def GetMessages(self, event_object):
"""Returns a list of messages extracted from an event object.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
if not getattr(event_object, 'allocated', True):
event_object.unallocated = u'unallocated'
return super(PfileStatFormatter, self).GetMessages(event_object)
+136
View File
@@ -0,0 +1,136 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Mozilla Firefox history."""
from plaso.lib import errors
from plaso.formatters import interface
class FirefoxBookmarkAnnotationFormatter(interface.ConditionalEventFormatter):
"""Formatter for a Firefox places.sqlite bookmark annotation."""
DATA_TYPE = 'firefox:places:bookmark_annotation'
FORMAT_STRING_PIECES = [
u'Bookmark Annotation: [{content}]',
u'to bookmark [{title}]',
u'({url})']
FORMAT_STRING_SHORT_PIECES = [u'Bookmark Annotation: {title}']
SOURCE_LONG = 'Firefox History'
SOURCE_SHORT = 'WEBHIST'
class FirefoxBookmarkFolderFormatter(interface.EventFormatter):
"""Formatter for a Firefox places.sqlite bookmark folder."""
DATA_TYPE = 'firefox:places:bookmark_folder'
FORMAT_STRING = u'{title}'
SOURCE_LONG = 'Firefox History'
SOURCE_SHORT = 'WEBHIST'
class FirefoxBookmarkFormatter(interface.ConditionalEventFormatter):
"""Formatter for a Firefox places.sqlite URL bookmark."""
DATA_TYPE = 'firefox:places:bookmark'
FORMAT_STRING_PIECES = [
u'Bookmark {type}',
u'{title}',
u'({url})',
u'[{places_title}]',
u'visit count {visit_count}']
FORMAT_STRING_SHORT_PIECES = [
u'Bookmarked {title}',
u'({url})']
SOURCE_LONG = 'Firefox History'
SOURCE_SHORT = 'WEBHIST'
class FirefoxPageVisitFormatter(interface.ConditionalEventFormatter):
"""Formatter for a Firefox places.sqlite page visited."""
DATA_TYPE = 'firefox:places:page_visited'
# Transitions defined in the source file:
# src/toolkit/components/places/nsINavHistoryService.idl
# Also contains further explanation into what each of these settings mean.
_URL_TRANSITIONS = {
1: 'LINK',
2: 'TYPED',
3: 'BOOKMARK',
4: 'EMBED',
5: 'REDIRECT_PERMANENT',
6: 'REDIRECT_TEMPORARY',
7: 'DOWNLOAD',
8: 'FRAMED_LINK',
}
_URL_TRANSITIONS.setdefault('UNKOWN')
# TODO: Make extra conditional formatting.
FORMAT_STRING_PIECES = [
u'{url}',
u'({title})',
u'[count: {visit_count}]',
u'Host: {host}',
u'{extra_string}']
FORMAT_STRING_SHORT_PIECES = [u'URL: {url}']
SOURCE_LONG = 'Firefox History'
SOURCE_SHORT = 'WEBHIST'
def GetMessages(self, event_object):
"""Return the message strings."""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
transition = self._URL_TRANSITIONS.get(
getattr(event_object, 'visit_type', 0), None)
if transition:
transition_str = u'Transition: {0!s}'.format(transition)
if hasattr(event_object, 'extra'):
if transition:
event_object.extra.append(transition_str)
event_object.extra_string = u' '.join(event_object.extra)
elif transition:
event_object.extra_string = transition_str
return super(FirefoxPageVisitFormatter, self).GetMessages(event_object)
class FirefoxDowloadFormatter(interface.EventFormatter):
"""Formatter for a Firefox downloads.sqlite download."""
DATA_TYPE = 'firefox:downloads:download'
FORMAT_STRING = (u'{url} ({full_path}). Received: {received_bytes} bytes '
u'out of: {total_bytes} bytes.')
FORMAT_STRING_SHORT = u'{full_path} downloaded ({received_bytes} bytes)'
SOURCE_LONG = 'Firefox History'
SOURCE_SHORT = 'WEBHIST'
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Firefox cache records."""
from plaso.formatters import interface
class FirefoxCacheFormatter(interface.ConditionalEventFormatter):
"""Formatter for Firefox cache record."""
DATA_TYPE = 'firefox:cache:record'
FORMAT_STRING_PIECES = [
u'Fetched {fetch_count} time(s)',
u'[{response_code}]',
u'{request_method}',
u'"{url}"']
FORMAT_STRING_SHORT_PIECES = [
u'[{response_code}]',
u'{request_method}',
u'"{url}"']
SOURCE_LONG = 'Firefox Cache'
SOURCE_SHORT = 'WEBHIST'
+40
View File
@@ -0,0 +1,40 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Firefox cookie."""
from plaso.formatters import interface
class FirefoxCookieFormatter(interface.ConditionalEventFormatter):
"""The event formatter for cookie data in Firefox Cookies database."""
DATA_TYPE = 'firefox:cookie:entry'
FORMAT_STRING_PIECES = [
u'{url}',
u'({cookie_name})',
u'Flags:',
u'[HTTP only]: {httponly}',
u'(GA analysis: {ga_data})']
FORMAT_STRING_SHORT_PIECES = [
u'{host}',
u'({cookie_name})']
SOURCE_LONG = 'Firefox Cookies'
SOURCE_SHORT = 'WEBHIST'
+70
View File
@@ -0,0 +1,70 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Google Analytics cookie."""
from plaso.formatters import interface
class AnalyticsUtmaCookieFormatter(interface.ConditionalEventFormatter):
"""The event formatter for UTMA Google Analytics cookie."""
DATA_TYPE = 'cookie:google:analytics:utma'
FORMAT_STRING_PIECES = [
u'{url}',
u'({cookie_name})',
u'Sessions: {sessions}',
u'Domain Hash: {domain_hash}',
u'Visitor ID: {domain_hash}']
FORMAT_STRING_SHORT_PIECES = [
u'{url}',
u'({cookie_name})']
SOURCE_LONG = 'Google Analytics Cookies'
SOURCE_SHORT = 'WEBHIST'
class AnalyticsUtmbCookieFormatter(AnalyticsUtmaCookieFormatter):
"""The event formatter for UTMB Google Analytics cookie."""
DATA_TYPE = 'cookie:google:analytics:utmb'
FORMAT_STRING_PIECES = [
u'{url}',
u'({cookie_name})',
u'Pages Viewed: {pages_viewed}',
u'Domain Hash: {domain_hash}']
class AnalyticsUtmzCookieFormatter(AnalyticsUtmaCookieFormatter):
"""The event formatter for UTMZ Google Analytics cookie."""
DATA_TYPE = 'cookie:google:analytics:utmz'
FORMAT_STRING_PIECES = [
u'{url}',
u'({cookie_name})',
u'Sessions: {sessions}',
u'Domain Hash: {domain_hash}',
u'Sources: {sources}',
u'Last source used to access: {utmcsr}',
u'Ad campaign information: {utmccn}',
u'Last type of visit: {utmcmd}',
u'Keywords used to find site: {utmctr}',
u'Path to the page of referring link: {utmcct}']
+55
View File
@@ -0,0 +1,55 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Google Drive snaphots."""
from plaso.formatters import interface
__author__ = 'David Nides (david.nides@gmail.com)'
class GDriveCloudEntryFormatter(interface.ConditionalEventFormatter):
"""Formatter for Google Drive snapshot cloud entry."""
DATA_TYPE = 'gdrive:snapshot:cloud_entry'
FORMAT_STRING_PIECES = [
u'File Path: {path}',
u'[{shared}]',
u'Size: {size}',
u'URL: {url}',
u'Type: {document_type}']
FORMAT_STRING_SHORT_PIECES = [u'{path}']
SOURCE_LONG = 'Google Drive (cloud entry)'
SOURCE_SHORT = 'LOG'
class GDriveLocalEntryFormatter(interface.ConditionalEventFormatter):
"""Formatter for Google Drive snapshot local entry."""
DATA_TYPE = 'gdrive:snapshot:local_entry'
FORMAT_STRING_PIECES = [
u'File Path: {path}',
u'Size: {size}']
FORMAT_STRING_SHORT_PIECES = [u'{path}']
SOURCE_LONG = 'Google Drive (local entry)'
SOURCE_SHORT = 'LOG'
+57
View File
@@ -0,0 +1,57 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Hachoir events."""
from plaso.lib import errors
from plaso.formatters import interface
__author__ = 'David Nides (david.nides@gmail.com)'
class HachoirFormatter(interface.EventFormatter):
"""Formatter for Hachoir based events."""
DATA_TYPE = 'metadata:hachoir'
FORMAT_STRING = u'{data}'
SOURCE_LONG = 'Hachoir Metadata'
SOURCE_SHORT = 'META'
def GetMessages(self, event_object):
"""Returns a list of messages extracted from an event object.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
string_parts = []
for key, value in sorted(event_object.metadata.items()):
string_parts.append(u'{0:s}: {1:s}'.format(key, value))
event_object.data = u' '.join(string_parts)
return super(HachoirFormatter, self).GetMessages(event_object)
+59
View File
@@ -0,0 +1,59 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Windows IIS log files."""
from plaso.formatters import interface
__author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)'
class WinIISFormatter(interface.ConditionalEventFormatter):
"""A formatter for Windows IIS log entries."""
DATA_TYPE = 'iis:log:line'
FORMAT_STRING_PIECES = [
u'{http_method}',
u'{requested_uri_stem}',
u'[',
u'{source_ip}',
u'>',
u'{dest_ip}',
u':',
u'{dest_port}',
u']',
u'Http Status: {http_status}',
u'Bytes Sent: {sent_bytes}',
u'Bytes Received: {received_bytes}',
u'User Agent: {user_agent}',
u'Protocol Version: {protocol_version}',]
FORMAT_STRING_SHORT_PIECES = [
u'{http_method}',
u'{requested_uri_stem}',
u'[',
u'{source_ip}',
u'>',
u'{dest_ip}',
u':',
u'{dest_port}',
u']',]
SOURCE_LONG = 'IIS Log'
SOURCE_SHORT = 'LOG'
+244
View File
@@ -0,0 +1,244 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the event formatters interface classes."""
import re
from plaso.lib import errors
from plaso.lib import registry
class EventFormatter(object):
"""Base class to format event type specific data using a format string.
Define the (long) format string and the short format string by defining
FORMAT_STRING and FORMAT_STRING_SHORT. The syntax of the format strings
is similar to that of format() where the place holder for a certain
event object attribute is defined as {attribute_name}.
"""
__metaclass__ = registry.MetaclassRegistry
__abstract = True
# The data type is a unique identifier for the event data. The current
# approach is to define it as human readable string in the format
# root:branch: ... :leaf, e.g. a page visited entry inside a Chrome History
# database is defined as: chrome:history:page_visited.
DATA_TYPE = u'internal'
# The format string.
FORMAT_STRING = u''
FORMAT_STRING_SHORT = u''
# The source short and long strings.
SOURCE_SHORT = u'LOG'
SOURCE_LONG = u''
def __init__(self):
"""Set up the formatter and determine if this is the right formatter."""
# Forcing the format string to be unicode to make sure we don't
# try to format it as an ASCII string.
self.format_string = unicode(self.FORMAT_STRING)
self.format_string_short = unicode(self.FORMAT_STRING_SHORT)
self.source_string = unicode(self.SOURCE_LONG)
self.source_string_short = unicode(self.SOURCE_SHORT)
def GetMessages(self, event_object):
"""Return a list of messages extracted from an event object.
The l2t_csv and other formats are dependent on a message field,
referred to as description_long and description_short in l2t_csv.
Plaso does not store this field explicitly, it only contains a format
string and the appropriate attributes.
This method takes the format string and converts that back into a
formatted string that can be used for display.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
Raises:
WrongFormatter: if the event object cannot be formatted by the formatter.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
event_values = event_object.GetValues()
try:
msg = self.format_string.format(**event_values)
except KeyError as exception:
msgs = []
msgs.append(u'Format error: [{0:s}] for: <{1:s}>'.format(
exception, self.format_string))
for attr, value in event_object.GetValues().iteritems():
msgs.append(u'{0}: {1}'.format(attr, value))
msg = u' '.join(msgs)
# Strip carriage return and linefeed form the message strings.
# Using replace function here because it is faster
# than re.sub() or string.strip().
msg = msg.replace('\r', u'').replace('\n', u'')
if not self.format_string_short:
msg_short = msg
else:
try:
msg_short = self.format_string_short.format(**event_values)
# Using replace function here because it is faster
# than re.sub() or string.strip().
msg_short = msg_short.replace('\r', u'').replace('\n', u'')
except KeyError:
msg_short = u'Unable to format short message string: {0:s}'.format(
self.format_string_short)
# Truncate the short message string if necessary.
if len(msg_short) > 80:
msg_short = u'{0:s}...'.format(msg_short[0:77])
return msg, msg_short
def GetSources(self, event_object):
"""Return a list containing source short and long."""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
event_object.data_type))
return self.source_string_short, self.source_string
class ConditionalEventFormatter(EventFormatter):
"""Base class to conditionally format event data using format string pieces.
Define the (long) format string and the short format string by defining
FORMAT_STRING_PIECES and FORMAT_STRING_SHORT_PIECES. The syntax of the
format strings pieces is similar to of the event formatter
(EventFormatter). Every format string piece should contain a single
attribute name or none.
FORMAT_STRING_SEPARATOR is used to control the string which the separate
string pieces should be joined. It contains a space by default.
"""
__abstract = True
# The format string pieces.
FORMAT_STRING_PIECES = [u'']
FORMAT_STRING_SHORT_PIECES = [u'']
# The separator used to join the string pieces.
FORMAT_STRING_SEPARATOR = u' '
def __init__(self):
"""Initializes the conditional formatter.
A map is build of the string pieces and their corresponding attribute
name to optimize conditional string formatting.
Raises:
RuntimeError: when an invalid format string piece is encountered.
"""
super(ConditionalEventFormatter, self).__init__()
# The format string can be defined as:
# {name}, {name:format}, {name!conversion}, {name!conversion:format}
regexp = re.compile('{[a-z][a-zA-Z0-9_]*[!]?[^:}]*[:]?[^}]*}')
regexp_name = re.compile('[a-z][a-zA-Z0-9_]*')
# The format string pieces map is a list containing the attribute name
# per format string piece. E.g. ["Description: {description}"] would be
# mapped to: [0] = "description". If the string piece does not contain
# an attribute name it is treated as text that does not needs formatting.
self._format_string_pieces_map = []
for format_string_piece in self.FORMAT_STRING_PIECES:
result = regexp.findall(format_string_piece)
if not result:
# The text format string piece is stored as an empty map entry to
# keep the index in the map equal to the format string pieces.
self._format_string_pieces_map.append('')
elif len(result) == 1:
# Extract the attribute name.
attribute_name = regexp_name.findall(result[0])[0]
self._format_string_pieces_map.append(attribute_name)
else:
raise RuntimeError((
u'Invalid format string piece: [{0:s}] contains more than 1 '
u'attribute name.').format(format_string_piece))
self._format_string_short_pieces_map = []
for format_string_piece in self.FORMAT_STRING_SHORT_PIECES:
result = regexp.findall(format_string_piece)
if not result:
# The text format string piece is stored as an empty map entry to
# keep the index in the map equal to the format string pieces.
self._format_string_short_pieces_map.append('')
elif len(result) == 1:
# Extract the attribute name.
attribute_name = regexp_name.findall(result[0])[0]
self._format_string_short_pieces_map.append(attribute_name)
else:
raise RuntimeError((
u'Invalid short format string piece: [{0:s}] contains more '
u'than 1 attribute name.').format(format_string_piece))
def GetMessages(self, event_object):
"""Returns a list of messages extracted from an event object.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
# Using getattr here to make sure the attribute is not set to None.
# if A.b = None, hasattr(A, b) is True but getattr(A, b, None) is False.
string_pieces = []
for map_index, attribute_name in enumerate(self._format_string_pieces_map):
if not attribute_name or hasattr(event_object, attribute_name):
if attribute_name:
attribute = getattr(event_object, attribute_name, None)
# If an attribute is an int, yet has zero value we want to include
# that in the format string, since that is still potentially valid
# information. Otherwise we would like to skip it.
if type(attribute) not in (bool, int, long, float) and not attribute:
continue
string_pieces.append(self.FORMAT_STRING_PIECES[map_index])
self.format_string = unicode(
self.FORMAT_STRING_SEPARATOR.join(string_pieces))
string_pieces = []
for map_index, attribute_name in enumerate(
self._format_string_short_pieces_map):
if not attribute_name or getattr(event_object, attribute_name, None):
string_pieces.append(self.FORMAT_STRING_SHORT_PIECES[map_index])
self.format_string_short = unicode(
self.FORMAT_STRING_SEPARATOR.join(string_pieces))
return super(ConditionalEventFormatter, self).GetMessages(event_object)
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the iPod device events."""
from plaso.formatters import interface
class IPodDeviceFormatter(interface.ConditionalEventFormatter):
"""Formatter for iPod device events."""
DATA_TYPE = 'ipod:device:entry'
FORMAT_STRING_PIECES = [
u'Device ID: {device_id}',
u'Type: {device_class}',
u'[{family_id}]',
u'Connected {use_count} times',
u'Serial nr: {serial_number}',
u'IMEI [{imei}]']
SOURCE_LONG = 'iPod Connections'
SOURCE_SHORT = 'LOG'
+34
View File
@@ -0,0 +1,34 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Java Cache IDX events."""
from plaso.formatters import interface
class JavaIDXFormatter(interface.ConditionalEventFormatter):
"""Formatter for a Java Cache IDX download item."""
DATA_TYPE = 'java:download:idx'
SOURCE_LONG = 'Java Cache IDX'
SOURCE_SHORT = 'JAVA_IDX'
FORMAT_STRING_PIECES = [
u'IDX Version: {idx_version}',
u'Host IP address: ({ip_address})',
u'Download URL: {url}']
+36
View File
@@ -0,0 +1,36 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Mac OS X launch services quarantine events."""
from plaso.formatters import interface
class LSQuarantineFormatter(interface.ConditionalEventFormatter):
"""Formatter for a LS Quarantine history event."""
DATA_TYPE = 'macosx:lsquarantine'
FORMAT_STRING_PIECES = [
u'[{agent}]',
u'Downloaded: {url}',
u'<{data}>']
FORMAT_STRING_SHORT_PIECES = [u'{url}']
SOURCE_LONG = 'LS Quarantine Event'
SOURCE_SHORT = 'LOG'
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Mac appfirewall.log file."""
from plaso.formatters import interface
class MacAppFirewallLogFormatter(interface.ConditionalEventFormatter):
"""Formatter for Mac appfirewall.log file."""
DATA_TYPE = 'mac:asl:appfirewall:line'
FORMAT_STRING_PIECES = [
u'Computer: {computer_name}',
u'Agent: {agent}',
u'Status: {status}',
u'Process name: {process_name}',
u'Log: {action}']
FORMAT_STRING_SHORT_PIECES = [
u'Process name: {process_name}',
u'Status: {status}']
SOURCE_LONG = 'Mac AppFirewall Log'
SOURCE_SHORT = 'LOG'
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for the Mac OS X Document Versions files."""
from plaso.formatters import interface
class MacDocumentVersionsFormatter(interface.ConditionalEventFormatter):
"""The event formatter for page visited data in Document Versions."""
DATA_TYPE = 'mac:document_versions:file'
FORMAT_STRING_PIECES = [
u'Version of [{name}]',
u'({path})',
u'stored in {version_path}',
u'by {user_sid}']
FORMAT_STRING_SHORT_PIECES = [
u'Stored a document version of [{name}]']
SOURCE_LONG = 'Document Versions'
SOURCE_SHORT = 'HISTORY'
+53
View File
@@ -0,0 +1,53 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Keychain password database file."""
from plaso.formatters import interface
class KeychainApplicationRecordFormatter(interface.ConditionalEventFormatter):
"""Formatter for keychain application record event."""
DATA_TYPE = 'mac:keychain:application'
FORMAT_STRING_PIECES = [
u'Name: {entry_name}',
u'Account: {account_name}']
FORMAT_STRING_SHORT_PIECES = [u'{entry_name}']
SOURCE_LONG = 'Keychain Application password'
SOURCE_SHORT = 'LOG'
class KeychainInternetRecordFormatter(interface.ConditionalEventFormatter):
"""Formatter for keychain internet record event."""
DATA_TYPE = 'mac:keychain:internet'
FORMAT_STRING_PIECES = [
u'Name: {entry_name}',
u'Account: {account_name}',
u'Where: {where}',
u'Protocol: {protocol}',
u'({type_protocol})']
FORMAT_STRING_SHORT_PIECES = [u'{entry_name}']
SOURCE_LONG = 'Keychain Internet password'
SOURCE_SHORT = 'LOG'
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for ASL securityd log file."""
from plaso.formatters import interface
class MacSecuritydLogFormatter(interface.ConditionalEventFormatter):
"""Formatter for ASL Securityd file."""
DATA_TYPE = 'mac:asl:securityd:line'
FORMAT_STRING_PIECES = [
u'Sender: {sender}',
u'({sender_pid})',
u'Level: {level}',
u'Facility: {facility}',
u'Text: {message}']
FORMAT_STRING_SHORT_PIECES = [u'Text: {message}']
SOURCE_LONG = 'Mac ASL Securityd Log'
SOURCE_SHORT = 'LOG'
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Mac wifi.log file."""
from plaso.formatters import interface
class MacWifiLogFormatter(interface.ConditionalEventFormatter):
"""Formatter for Mac Wifi file."""
DATA_TYPE = 'mac:wifilog:line'
FORMAT_STRING_PIECES = [
u'Action: {action}',
u'Agent: {user}',
u'({function})',
u'Log: {text}']
FORMAT_STRING_SHORT_PIECES = [
u'Action: {action}']
SOURCE_LONG = 'Mac Wifi Log'
SOURCE_SHORT = 'LOG'
+35
View File
@@ -0,0 +1,35 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a MacKeepr Cache formatter in plaso."""
from plaso.formatters import interface
class MacKeeperCacheFormatter(interface.ConditionalEventFormatter):
"""Formatter for MacKeeper Cache extracted events."""
DATA_TYPE = 'mackeeper:cache'
FORMAT_STRING_PIECES = [
u'{description}', u'<{event_type}>', u':', u'{text}', u'[',
u'URL: {url}', u'Event ID: {record_id}', 'Room: {room}', u']']
FORMAT_STRING_SHORT_PIECES = [u'<{event_type}>', u'{text}']
SOURCE_LONG = 'MacKeeper Cache'
SOURCE_SHORT = 'LOG'
+32
View File
@@ -0,0 +1,32 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Sleuthkit (TSK) bodyfile or mactime format."""
from plaso.formatters import interface
class MactimeFormatter(interface.EventFormatter):
"""Class that formats mactime bodyfile events."""
DATA_TYPE = 'fs:mactime:line'
# The format string.
FORMAT_STRING = u'{filename}'
SOURCE_LONG = 'Mactime Bodyfile'
SOURCE_SHORT = 'FILE'
+140
View File
@@ -0,0 +1,140 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the event formatters manager class."""
import logging
from plaso.formatters import interface
from plaso.lib import utils
class DefaultFormatter(interface.EventFormatter):
"""Default formatter for events that do not have any defined formatter."""
DATA_TYPE = u'event'
FORMAT_STRING = u'<WARNING DEFAULT FORMATTER> Attributes: {attribute_driven}'
FORMAT_STRING_SHORT = u'<DEFAULT> {attribute_driven}'
def GetMessages(self, event_object):
"""Return a list of messages extracted from an event object."""
text_pieces = []
for key, value in event_object.GetValues().items():
if key in utils.RESERVED_VARIABLES:
continue
text_pieces.append(u'{0:s}: {1!s}'.format(key, value))
event_object.attribute_driven = u' '.join(text_pieces)
# Due to the way the default formatter behaves it requires the data_type
# to be set as 'event', otherwise it will complain and deny processing
# the event.
# TODO: Change this behavior and allow the default formatter to accept
# arbitrary data types (as it should).
old_data_type = getattr(event_object, 'data_type', None)
event_object.data_type = self.DATA_TYPE
msg, msg_short = super(DefaultFormatter, self).GetMessages(event_object)
event_object.data_type = old_data_type
return msg, msg_short
class EventFormatterManager(object):
"""Class to manage the event formatters."""
@classmethod
def GetFormatter(cls, event_object):
"""Retrieves the formatter for a specific event object.
This function builds a map of data types and the corresponding event
formatters. At the moment this map is only build once.
Args:
event_object: The event object (EventObject) which is used to identify
the formatter.
Returns:
The corresponding formatter (EventFormatter) if available or None.
Raises:
RuntimeError if a duplicate event formatter is found while building
the map of event formatters.
"""
if not hasattr(cls, 'event_formatters'):
cls.event_formatters = {}
cls.default_formatter = DefaultFormatter()
for cls_formatter in interface.EventFormatter.classes:
try:
formatter = interface.EventFormatter.classes[cls_formatter]()
# Raise on duplicate formatters.
if formatter.DATA_TYPE in cls.event_formatters:
raise RuntimeError((
u'event formatter for data type: {0:s} defined in: {1:s} and '
u'{2:s}.').format(
formatter.DATA_TYPE, cls_formatter,
cls.event_formatters[
formatter.DATA_TYPE].__class__.__name__))
cls.event_formatters[formatter.DATA_TYPE] = formatter
except RuntimeError as exeception:
# Ignore broken formatters.
logging.warning(u'{0:s}'.format(exeception))
cls.event_formatters.setdefault(None)
if event_object.data_type in cls.event_formatters:
return cls.event_formatters[event_object.data_type]
else:
logging.warning(
u'Using default formatter for data type: {0:s}'.format(
event_object.data_type))
return cls.default_formatter
@classmethod
def GetMessageStrings(cls, event_object):
"""Retrieves the formatted message strings for a specific event object.
Args:
event_object: The event object (EventObject) which is used to identify
the formatter.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
formatter = cls.GetFormatter(event_object)
if not formatter:
return u'', u''
return formatter.GetMessages(event_object)
@classmethod
def GetSourceStrings(cls, event_object):
"""Retrieves the formatted source long and short strings for an event.
Args:
event_object: The event object (EventObject) which is used to identify
the formatter.
Returns:
A list that contains the source_short and source_long version of the
event.
"""
# TODO: change this to return the long variant first so it is consistent
# with GetMessageStrings.
formatter = cls.GetFormatter(event_object)
if not formatter:
return u'', u''
return formatter.GetSources(event_object)
+163
View File
@@ -0,0 +1,163 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a unit test for the event formatters."""
import unittest
from plaso.formatters import interface
from plaso.formatters import manager
from plaso.formatters import winreg # pylint: disable=unused-import
from plaso.lib import event_test
class TestEvent1Formatter(interface.EventFormatter):
"""Test event 1 formatter."""
DATA_TYPE = 'test:event1'
FORMAT_STRING = u'{text}'
SOURCE_SHORT = 'FILE'
SOURCE_LONG = 'Weird Log File'
class WrongEventFormatter(interface.EventFormatter):
"""A simple event formatter."""
DATA_TYPE = 'test:wrong'
FORMAT_STRING = u'This format string does not match {body}.'
SOURCE_SHORT = 'FILE'
SOURCE_LONG = 'Weird Log File'
class EventFormatterUnitTest(unittest.TestCase):
"""The unit test for the event formatter."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self._formatters_manager = manager.EventFormatterManager
self.event_objects = event_test.GetEventObjects()
def GetCSVLine(self, event_object):
"""Takes an EventObject and prints out a simple CSV line from it."""
try:
msg, _ = self._formatters_manager.GetMessageStrings(event_object)
source_short, source_long = self._formatters_manager.GetSourceStrings(
event_object)
except KeyError:
print event_object.GetAttributes()
return u'{0:d},{1:s},{2:s},{3:s}'.format(
event_object.timestamp, source_short, source_long, msg)
def testInitialization(self):
"""Test the initialization."""
self.assertTrue(TestEvent1Formatter())
def testAttributes(self):
"""Test if we can read the event attributes correctly."""
events = {}
for event_object in self.event_objects:
events[self.GetCSVLine(event_object)] = True
self.assertIn((
u'1334961526929596,REG,UNKNOWN key,[MY AutoRun key] Run: '
u'c:/Temp/evil.exe'), events)
self.assertIn(
(u'1334966206929596,REG,UNKNOWN key,[//HKCU/Secret/EvilEmpire/'
u'Malicious_key] Value: send all the exes to the other '
u'world'), events)
self.assertIn((u'1334940286000000,REG,UNKNOWN key,[//HKCU/Windows'
u'/Normal] Value: run all the benign stuff'), events)
self.assertIn((u'1335781787929596,FILE,Weird Log File,This log line reads '
u'ohh so much.'), events)
self.assertIn((u'1335781787929596,FILE,Weird Log File,Nothing of interest'
u' here, move on.'), events)
self.assertIn((u'1335791207939596,FILE,Weird Log File,Mr. Evil just logged'
u' into the machine and got root.'), events)
def testTextBasedEvent(self):
"""Test a text based event."""
for event_object in self.event_objects:
source_short, _ = self._formatters_manager.GetSourceStrings(event_object)
if source_short == 'LOG':
msg, msg_short = self._formatters_manager.GetMessageStrings(
event_object)
self.assertEquals(msg, (
u'This is a line by someone not reading the log line properly. And '
u'since this log line exceeds the accepted 80 chars it will be '
u'shortened.'))
self.assertEquals(msg_short, (
u'This is a line by someone not reading the log line properly. '
u'And since this l...'))
class ConditionalTestEvent1(event_test.TestEvent1):
DATA_TYPE = 'test:conditional_event1'
class ConditionalTestEvent1Formatter(interface.ConditionalEventFormatter):
"""Test event 1 conditional (event) formatter."""
DATA_TYPE = 'test:conditional_event1'
FORMAT_STRING_PIECES = [
u'Description: {description}',
u'Comment',
u'Value: 0x{numeric:02x}',
u'Optional: {optional}',
u'Text: {text}']
SOURCE_SHORT = 'LOG'
SOURCE_LONG = 'Some Text File.'
class BrokenConditionalEventFormatter(interface.ConditionalEventFormatter):
"""A broken conditional event formatter."""
DATA_TYPE = 'test:broken_conditional'
FORMAT_STRING_PIECES = [u'{too} {many} formatting placeholders']
SOURCE_SHORT = 'LOG'
SOURCE_LONG = 'Some Text File.'
class ConditionalEventFormatterUnitTest(unittest.TestCase):
"""The unit test for the conditional event formatter."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self.event_object = ConditionalTestEvent1(1335791207939596, {
'numeric': 12, 'description': 'this is beyond words',
'text': 'but we\'re still trying to say something about the event'})
def testInitialization(self):
"""Test the initialization."""
self.assertTrue(ConditionalTestEvent1Formatter())
with self.assertRaises(RuntimeError):
BrokenConditionalEventFormatter()
def testGetMessages(self):
"""Test get messages."""
event_formatter = ConditionalTestEvent1Formatter()
msg, _ = event_formatter.GetMessages(self.event_object)
expected_msg = (
u'Description: this is beyond words Comment Value: 0x0c '
u'Text: but we\'re still trying to say something about the event')
self.assertEquals(msg, expected_msg)
if __name__ == '__main__':
unittest.main()
+34
View File
@@ -0,0 +1,34 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the McAfee AV Logs files."""
from plaso.formatters import interface
class McafeeAccessProtectionLogEventFormatter(interface.EventFormatter):
"""Class that formats the McAfee Access Protection Log events."""
DATA_TYPE = 'av:mcafee:accessprotectionlog'
# The format string.
FORMAT_STRING = (u'File Name: {filename} User: {username} {trigger_location} '
u'{status} {rule} {action}')
FORMAT_STRING_SHORT = u'{filename} {action}'
SOURCE_LONG = 'McAfee Access Protection Log'
SOURCE_SHORT = 'LOG'
+99
View File
@@ -0,0 +1,99 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatters for the MSIE WebCache ESE database events."""
from plaso.formatters import interface
class MsieWebCacheContainerEventFormatter(interface.ConditionalEventFormatter):
"""Formatter for a MSIE WebCache ESE database Container_# table record."""
DATA_TYPE = 'msie:webcache:container'
FORMAT_STRING_PIECES = [
u'Entry identifier: {entry_identifier}',
u'Container identifier: {container_identifier}',
u'Cache identifier: {cache_identifier}',
u'URL: {url}',
u'Redirect URL: {redirect_url}',
u'Access count: {access_count}',
u'Sync count: {sync_count}',
u'Filename: {cached_filename}',
u'File extension: {file_extension}',
u'Cached file size: {cached_file_size}',
u'Request headers: {request_headers}',
u'Response headers: {response_headers}']
FORMAT_STRING_SHORT_PIECES = [
u'URL: {url}']
SOURCE_LONG = 'MSIE WebCache container record'
SOURCE_SHORT = 'WEBHIST'
class MsieWebCacheContainersEventFormatter(interface.ConditionalEventFormatter):
"""Formatter for a MSIE WebCache ESE database Containers table record."""
DATA_TYPE = 'msie:webcache:containers'
FORMAT_STRING_PIECES = [
u'Container identifier: {container_identifier}',
u'Set identifier: {set_identifier}',
u'Name: {name}',
u'Directory: {directory}',
u'Table: Container_{container_identifier}']
FORMAT_STRING_SHORT_PIECES = [
u'Directory: {directory}']
SOURCE_LONG = 'MSIE WebCache containers record'
SOURCE_SHORT = 'WEBHIST'
class MsieWebCacheLeakFilesEventFormatter(interface.ConditionalEventFormatter):
"""Formatter for a MSIE WebCache ESE database LeakFiles table record."""
DATA_TYPE = 'msie:webcache:leak_file'
FORMAT_STRING_PIECES = [
u'Leak identifier: {leak_identifier}',
u'Filename: {cached_filename}']
FORMAT_STRING_SHORT_PIECES = [
u'Filename: {cached_filename}']
SOURCE_LONG = 'MSIE WebCache partitions record'
SOURCE_SHORT = 'WEBHIST'
class MsieWebCachePartitionsEventFormatter(interface.ConditionalEventFormatter):
"""Formatter for a MSIE WebCache ESE database Partitions table record."""
DATA_TYPE = 'msie:webcache:partitions'
FORMAT_STRING_PIECES = [
u'Partition identifier: {partition_identifier}',
u'Partition type: {partition_type}',
u'Directory: {directory}',
u'Table identifier: {table_identifier}']
FORMAT_STRING_SHORT_PIECES = [
u'Directory: {directory}']
SOURCE_LONG = 'MSIE WebCache partitions record'
SOURCE_SHORT = 'WEBHIST'
+65
View File
@@ -0,0 +1,65 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Microsoft Internet Explorer (MSIE) Cache Files (CF) events."""
from plaso.lib import errors
from plaso.formatters import interface
class MsiecfUrlFormatter(interface.ConditionalEventFormatter):
"""Formatter for a MSIECF URL item."""
DATA_TYPE = 'msiecf:url'
FORMAT_STRING_PIECES = [
u'Location: {url}',
u'Number of hits: {number_of_hits}',
u'Cached file size: {cached_file_size}',
u'HTTP headers: {http_headers_cleaned}',
u'{recovered_string}']
FORMAT_STRING_SHORT_PIECES = [
u'Location: {url}']
SOURCE_LONG = 'MSIE Cache File URL record'
SOURCE_SHORT = 'WEBHIST'
def GetMessages(self, event_object):
"""Returns a list of messages extracted from an event object.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
if hasattr(event_object, 'http_headers'):
event_object.http_headers_cleaned = event_object.http_headers.replace(
'\r\n', ' - ')
# TODO: Could this be moved upstream since this is done in other parsers
# as well?
if getattr(event_object, 'recovered', None):
event_object.recovered_string = '[Recovered Entry]'
return super(MsiecfUrlFormatter, self).GetMessages(event_object)
+149
View File
@@ -0,0 +1,149 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatters for OLE Compound File (OLECF) events."""
from plaso.formatters import interface
from plaso.lib import errors
class OleCfItemFormatter(interface.EventFormatter):
"""Formatter for an OLECF item."""
DATA_TYPE = 'olecf:item'
FORMAT_STRING = u'Name: {name}'
FORMAT_STRING_SHORT = u'Name: {name}'
SOURCE_LONG = 'OLECF Item'
SOURCE_SHORT = 'OLECF'
class OleCfDestListEntryFormatter(interface.ConditionalEventFormatter):
"""Formatter for an OLECF DestList stream."""
DATA_TYPE = 'olecf:dest_list:entry'
FORMAT_STRING_PIECES = [
u'Entry: {entry_number}',
u'Pin status: {pin_status_string}',
u'Hostname: {hostname}',
u'Path: {path}',
u'Droid volume identifier: {droid_volume_identifier}',
u'Droid file identifier: {droid_file_identifier}',
u'Birth droid volume identifier: {birth_droid_volume_identifier}',
u'Birth droid file identifier: {birth_droid_file_identifier}']
FORMAT_STRING_SHORT_PIECES = [
u'Entry: {entry_number}',
u'Pin status: {pin_status_string}',
u'Path: {path}']
def GetMessages(self, event_object):
"""Returns a list of messages extracted from an event object.
Args:
event_object: The event object (EventObject) containing the event
specific data.
Returns:
A list that contains both the longer and shorter version of the message
string.
"""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format(
event_object.data_type))
pin_status = getattr(event_object, 'pin_status', None)
if pin_status == 0xffffffff:
event_object.pin_status_string = u'Unpinned'
else:
event_object.pin_status_string = u'Pinned'
return super(OleCfDestListEntryFormatter, self).GetMessages(event_object)
class OleCfDocumentSummaryInfoFormatter(interface.ConditionalEventFormatter):
"""Formatter for an OLECF Summary Info property set stream."""
DATA_TYPE = 'olecf:document_summary_info'
FORMAT_STRING_PIECES = [
u'Number of bytes: {number_of_bytes}',
u'Number of lines: {number_of_lines}',
u'Number of paragraphs: {number_of_paragraphs}',
u'Number of slides: {number_of_slides}',
u'Number of notes: {number_of_notes}',
u'Number of hidden slides: {number_of_hidden_slides}',
u'Number of multi-media clips: {number_of_clips}',
u'Company: {company}',
u'Manager: {manager}',
u'Shared document: {shared_document}',
u'Application version: {application_version}',
u'Content type: {content_type}',
u'Content status: {content_status}',
u'Language: {language}',
u'Document version: {document_version}']
# TODO: add support for the following properties.
# u'Digital signature: {digital_signature}',
FORMAT_STRING_SHORT_PIECES = [
u'Company: {company}']
SOURCE_LONG = 'OLECF Document Summary Info'
SOURCE_SHORT = 'OLECF'
class OleCfSummaryInfoFormatter(interface.ConditionalEventFormatter):
"""Formatter for an OLECF Summary Info property set stream."""
DATA_TYPE = 'olecf:summary_info'
FORMAT_STRING_PIECES = [
u'Title: {title}',
u'Subject: {subject}',
u'Author: {author}',
u'Keywords: {keywords}',
u'Comments: {comments}',
u'Template: {template}',
u'Revision number: {revision_number}',
u'Last saved by: {last_saved_by}',
u'Total edit time: {total_edit_time}',
u'Number of pages: {number_of_pages}',
u'Number of words: {number_of_words}',
u'Number of characters: {number_of_characters}',
u'Application: {application}',
u'Security: {security}']
FORMAT_STRING_SHORT_PIECES = [
u'Title: {title}',
u'Subject: {subject}',
u'Author: {author}',
u'Revision number: {revision_number}']
SOURCE_LONG = 'OLECF Summary Info'
SOURCE_SHORT = 'OLECF'
# TODO: add a function to print the security as a descriptive string.
_SECURITY_VALUES = {
0x00000001: 'Password protected',
0x00000002: 'Read-only recommended',
0x00000004: 'Read-only enforced',
0x00000008: 'Locked for annotations',
}
+47
View File
@@ -0,0 +1,47 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for Opera history events."""
from plaso.formatters import interface
class OperaGlobalHistoryFormatter(interface.ConditionalEventFormatter):
"""Formatter for an Opera global history event."""
DATA_TYPE = 'opera:history:entry'
FORMAT_STRING_PIECES = [
u'{url}',
u'({title})',
u'[{description}]']
SOURCE_LONG = 'Opera Browser History'
SOURCE_SHORT = 'WEBHIST'
class OperaTypedHistoryFormatter(interface.ConditionalEventFormatter):
"""Formatter for an Opera typed history event."""
DATA_TYPE = 'opera:history:typed_entry'
FORMAT_STRING_PIECES = [
u'{url}',
u'({entry_selection})']
SOURCE_LONG = 'Opera Browser History'
SOURCE_SHORT = 'WEBHIST'
+67
View File
@@ -0,0 +1,67 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for OpenXML events."""
from plaso.formatters import interface
__author__ = 'David Nides (david.nides@gmail.com)'
class OpenXMLParserFormatter(interface.ConditionalEventFormatter):
"""Formatter for OXML events."""
DATA_TYPE = 'metadata:openxml'
FORMAT_STRING_PIECES = [
u'Creating App: {creating_app}',
u'App version: {app_version}',
u'Title: {title}',
u'Subject: {subject}',
u'Last saved by: {last_saved_by}',
u'Author: {author}',
u'Total edit time (secs): {total_edit_time}',
u'Keywords: {keywords}',
u'Comments: {comments}',
u'Revision Num: {revision_num}',
u'Template: {template}',
u'Num pages: {num_pages}',
u'Num words: {num_words}',
u'Num chars: {num_chars}',
u'Num chars with spaces: {num_chars_w_spaces}',
u'Num lines: {num_lines}',
u'Company: {company}',
u'Manager: {manager}',
u'Shared: {shared}',
u'Security: {security}',
u'Hyperlinks changed: {hyperlinks_changed}',
u'Links up to date: {links_up_to_date}',
u'Scale crop: {scale_crop}',
u'Digital signature: {dig_sig}',
u'Slides: {slides}',
u'Hidden slides: {hidden_slides}',
u'Presentation format: {presentation_format}',
u'MM clips: {mm_clips}',
u'Notes: {notes}']
FORMAT_STRING_SHORT_PIECES = [
u'Title: {title}',
u'Subject: {subject}',
u'Author: {author}']
SOURCE_LONG = 'Open XML Metadata'
SOURCE_SHORT = 'META'
+50
View File
@@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for PCAP files."""
from plaso.formatters import interface
__author__ = 'Dominique Kilman (lexistar97@gmail.com)'
class PCAPFormatter(interface.ConditionalEventFormatter):
"""Define the formatting PCAP record."""
DATA_TYPE = 'metadata:pcap'
FORMAT_STRING_PIECES = [
u'Source IP: {source_ip}',
u'Destination IP: {dest_ip}',
u'Source Port: {source_port}',
u'Destination Port: {dest_port}',
u'Protocol: {protocol}',
u'Type: {stream_type}',
u'Size: {size}',
u'Protocol Data: {protocol_data}',
u'Stream Data: {stream_data}',
u'First Packet ID: {first_packet_id}',
u'Last Packet ID: {last_packet_id}',
u'Packet Count: {packet_count}']
FORMAT_STRING_SHORT_PIECES = [
u'Type: {stream_type}',
u'First Packet ID: {first_packet_id}']
SOURCE_LONG = 'Packet Capture File (pcap)'
SOURCE_SHORT = 'PCAP'
+36
View File
@@ -0,0 +1,36 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a formatter for Plist Events."""
from plaso.formatters import interface
class PlistFormatter(interface.ConditionalEventFormatter):
"""Event Formatter for plist keys."""
DATA_TYPE = 'plist:key'
FORMAT_STRING_SEPARATOR = u''
FORMAT_STRING_PIECES = [
u'{root}/',
u'{key}',
u' {desc}']
SOURCE_LONG = 'Plist Entry'
SOURCE_SHORT = 'PLIST'
+33
View File
@@ -0,0 +1,33 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for PL-Sql Recall events."""
from plaso.formatters import interface
class PlsRecallFormatter(interface.EventFormatter):
"""Formatter for a for a PL-Sql Recall file container."""
DATA_TYPE = 'PLSRecall:event'
SOURCE_LONG = 'PL-Sql Developer Recall file'
SOURCE_SHORT = 'PLSRecall'
# The format string.
FORMAT_STRING = (u'Sequence #{sequence} User: {username} '
u'Database Name: {database_name} Query: {query}')
FORMAT_STRING_SHORT = u'{sequence} {username} {database_name} {query}'
+55
View File
@@ -0,0 +1,55 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Popularity Contest parser events."""
from plaso.formatters import interface
class PopularityContestSessionFormatter(interface.ConditionalEventFormatter):
"""Formatter for Popularity Contest Session information."""
DATA_TYPE = 'popularity_contest:session:event'
FORMAT_STRING_PIECES = [
u'Session {session}',
u'{status}',
u'ID {hostid}',
u'[{details}]']
FORMAT_STRING_SHORT_PIECES = [
u'Session {session}',
u'{status}']
SOURCE_LONG = 'Popularity Contest Session'
SOURCE_SHORT = 'LOG'
class PopularityContestLogFormatter(interface.ConditionalEventFormatter):
"""Formatter for Popularity Contest Log events."""
DATA_TYPE = 'popularity_contest:log:event'
FORMAT_STRING_PIECES = [
u'mru [{mru}]',
u'package [{package}]',
u'tag [{record_tag}]']
FORMAT_STRING_SHORT_PIECES = [u'{mru}']
SOURCE_LONG = 'Popularity Contest Log'
SOURCE_SHORT = 'LOG'
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Formatter for the Windows recycle files."""
from plaso.lib import errors
from plaso.formatters import interface
class WinRecyclerFormatter(interface.ConditionalEventFormatter):
"""Formatter for Windows recycle bin events."""
DATA_TYPE = 'windows:metadata:deleted_item'
DRIVE_LIST = {
0x00: 'A',
0x01: 'B',
0x02: 'C',
0x03: 'D',
0x04: 'E',
0x05: 'F',
0x06: 'G',
0x07: 'H',
0x08: 'I',
0x09: 'J',
0x0A: 'K',
0x0B: 'L',
0x0C: 'M',
0x0D: 'N',
0x0E: 'O',
0x0F: 'P',
0x10: 'Q',
0x11: 'R',
0x12: 'S',
0x13: 'T',
0x14: 'U',
0x15: 'V',
0x16: 'W',
0x17: 'X',
0x18: 'Y',
0x19: 'Z',
}
# The format string.
FORMAT_STRING_PIECES = [
u'DC{index} ->',
u'{orig_filename}',
u'[{orig_filename_legacy}]',
u'(from drive {drive_letter})']
FORMAT_STRING_SHORT_PIECES = [
u'Deleted file: {orig_filename}']
SOURCE_LONG = 'Recycle Bin'
SOURCE_SHORT = 'RECBIN'
def GetMessages(self, event_object):
"""Return the message strings."""
if self.DATA_TYPE != event_object.data_type:
raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format(
event_object.data_type))
if hasattr(event_object, 'drive_number'):
event_object.drive_letter = self.DRIVE_LIST.get(
event_object.drive_number, 'C?')
return super(WinRecyclerFormatter, self).GetMessages(event_object)
+422
View File
@@ -0,0 +1,422 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains formatters for the parsed Rubanetra events. Additionally, a Java Instant formatter was defined
as well."""
from plaso.formatters import interface
__author__ = 'Stefan Swerk (stefan_rubanetra@swerk.priv.at)'
class RubanetraBaseActivityFormatter(interface.ConditionalEventFormatter):
""" Formatter for a Rubanetra BaseActivity """
DATA_TYPE = 'java:rubanetra:base_activity'
SOURCE_SHORT = 'LOG'
SOURCE_LONG = 'at.jku.fim.rubanetra.BaseActivity'
FORMAT_STRING_PIECES = [
u'activityType: \'{activity_type}\'',
u'firstTimestamp: \'{first_timestamp}\'',
u'lastTimestamp: \'{last_timestamp}\'',
u'description: \'{description}\'',
u'sourceAddress: \'{source_address}\'',
u'destinationAddress: \'{destination_address}\'',
u'compoundFrameNumbers: \'{compound_frame_number_list}\'',
u'isReplaced: \'{replaced}\'',
u'optionalFields: \'{optional_field_dict}\'']
class RubanetraPcapActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:pcap_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.PcapActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES \
+ [u'totalSize: \'{pcap_total_size}\'',
u'frameNumber: \'{pcap_frame_number}\'',
u'wireLength: \'{pcap_packet_wirelen}\'',
u'headerCount: \'{pcap_header_count}\'']
class RubanetraHttpRequestActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:http_request_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.HttpRequestActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'serverAddress: \'{server_address}\'',
u'clientAddress: \'{client_address}\'',
u'httpVersion: \'{http_version}\'',
u'httpMethod: \'{http_method}\'',
u'httpQueryString: \'{http_query_string}\'',
u'httpQueryParameters: \'{http_query_parameters}\'',
u'httpRequestHeader: \'{http_request_header_dict}\'',
u'url: \'{url}\'',
u'originalHttpHeader: \'{orig_http_header}\'',
u'contentType: \'{content_type}\'',
u'isResponse: \'{is_response}\'',
u'JNetPcapHttpString: \'{jnetpcap_http_string}\'']
class RubanetraHttpResponseActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:http_response_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.HttpResponseActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'httpVersion: \'{http_version}\'',
u'httpStatusCode: \'{response_status_code}\'',
u'httpStatusLine: \'{response_status_line}\'',
u'httpResponseHeader: \'{response_header_dict}\'',
u'originalHttpHeader: \'{orig_http_header}\'',
u'contentType: \'{content_type}\'',
u'JNetPcapHttpString: \'{jnetpcap_http_string}\'']
class RubanetraDnsActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:dns_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.DnsActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'questionRecords: \'{question_record_list}\'',
u'answerRecords: \'{answer_record_list}\'',
u'authorityRecords: \'{authority_record_list}\'',
u'additionalRecords: \'{additional_record_list}\'',
u'dnsMessageHeader: \'{dns_message_header}\'',
u'isResponse: \'{is_response_bool}\'']
class RubanetraHttpImageActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:http_image_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.HttpImageActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'imageType: \'{image_type}\'',
u'imagePath: \'{image_path}\'']
class RubanetraArpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:arp_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.ArpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'hardwareType: \'{hardware_type}\'',
u'protocolType: \'{protocol_type}\'',
u'hardwareAddressLength: \'{hardware_address_length}\'',
u'protocolAddressLength: \'{protocol_address_length}\'',
u'senderHardwareAddress: \'{sender_mac_address}\'',
u'targetHardwareAddress: \'{target_mac_address}\'',
u'senderProtocolAddress: \'{sender_protocol_address}\'',
u'targetProtocolAddress: \'{target_protocol_address}\'',
u'JNetPcapArpString: \'{jnetpcap_arp}\'']
class RubanetraDhcpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:dhcp_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.DhcpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'dhcpMessage: \'{dhcp_message}\'']
class RubanetraEthernetActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:ethernet_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.EthernetActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'sourceMacAddress: \'{source_mac_address}\'',
u'destinationMacAddress: \'{destination_mac_address}\'',
u'ethernetType: \'{ethernet_type}\'',
u'ethernetTypeEnum: \'{ethernet_type_enum}\'',
u'JNetPcapEthernetString: \'{jnetpcap_ethernet}\'']
class RubanetraFtpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:ftp_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.FtpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'ftpActivityType: \'{ftp_type}\'',
u'command: \'{command}\'',
u'reply: \'{reply}\'',
u'list: \'{list}\'']
class RubanetraIcmpv4ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:icmpv4_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv4Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'icmpSubType: \'{icmp_subtype}\'',
u'icmpPacket: \'{icmp_packet}\'',
u'icmpMessage: \'{icmp_message}\'',
u'icmpType: \'{icmp_type}\'',
u'icmpCode: \'{icmp_code}\'',
u'sourceAddress: \'{source_address}\'',
u'destinationAddress: \'{destination_address}\'',
u'identifier: \'{identifier}\'',
u'sequence: \'{sequence}\'',
u'JNetPcapIcmpString: \'{jnetpcap_icmp}\'']
class RubanetraIcmpv6ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:icmpv6_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv6Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'icmpSubType: \'{icmp_subtype}\'',
u'icmpPacket: \'{icmp_packet}\'',
u'icmpMessage: \'{icmp_message}\'',
u'icmpType: \'{icmp_type}\'',
u'JNetPcapIcmpString: \'{jnetpcap_icmp}\'']
class RubanetraIpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:ip_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.IpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'version: \'{version}\'',
u'protocol: \'{protocol}\'',
u'sourceAddress: \'{source_address}\'',
u'destinationAddress: \'{destination_address}\'']
class RubanetraIpv4ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:ipv4_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv4Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'internetHeaderLength: \'{internet_header_length}\'',
u'differentiatedServicesCodePoint: \'{differentiated_services_code_point}\'',
u'totalLength: \'{total_length}\'',
u'identification: \'{identification}\'',
u'flags: \'{flags}\'',
u'fragmentOffset: \'{fragment_offset}\'',
u'timeToLive: \'{time_to_live}\'',
u'headerChecksum: \'{header_checksum}\'',
u'options: \'{options}\'',
u'JNetPcapIpv4String: \'{jnetpcap_ip4}\'']
class RubanetraIpv6ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:ipv6_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv6Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'trafficClass: \'{traffic_class}\'',
u'flowLabel: \'{flow_label}\'',
u'payloadLength: \'{payload_length}\'',
u'nextHeader: \'{next_header}\'',
u'hopLimit: \'{hop_limit}\'',
u'JNetPcapIpv6String: \'{jnetpcap_ip6}\'',
u'KrakenIpv6String: \'{kraken_ip6}\'']
class RubanetraMsnActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:msn_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.MsnActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'account: \'{account}\'',
u'chat: \'{chat}\'']
class RubanetraNetbiosActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:Netbios_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.NetbiosActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'datagramPacket: \'{datagram_packet}\'',
u'namePacket: \'{name_packet}\'']
class RubanetraPop3ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:pop3_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Pop3Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'subType: \'{sub_type}\'',
u'header: \'{header}\'',
u'data: \'{data}\'',
u'command: \'{command}\'',
u'response: \'{response}\'']
class RubanetraSmtpCommandActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:smtp_command_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpCommandActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'command: \'{command}\'',
u'parameter: \'{parameter}\'']
class RubanetraSmtpReplyActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:smtp_reply_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpReplyActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'code: \'{code}\'',
u'message: \'{message}\'']
class RubanetraSmtpSendActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:smtp_send_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpSendActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'header: \'{header}\'',
u'data: \'{data}\'']
class RubanetraSnmpv1ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:snmpv1_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv1Activity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'pdu: \'{pdu}\'',
u'sourceSocketAddress: \'{source_socket_address}\'',
u'destinationSocketAddress: \'{destination_socket_address}\'']
class RubanetraSnmpv2ActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:snmpv2_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv2Activity'
FORMAT_STRING_PIECES = RubanetraSnmpv1ActivityFormatter.FORMAT_STRING_PIECES
class RubanetraTcpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:tcp_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.TcpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'sourcePort: \'{source_port}\'',
u'destinationPort: \'{destination_port}\'',
u'sequenceNumber: \'{sequence_number}\'',
u'acknowledgeNumber: \'{acknowledge_number}\'',
u'relativeSequenceNumber: \'{relative_sequence_number}\'',
u'relativeAcknowledgeNumber: \'{relative_acknowledge_number}\'',
u'dataOffset: \'{data_offset}\'',
u'controlBits: \'{control_bits}\'',
u'windowSize: \'{window_size}\'',
u'checksum: \'{checksum}\'',
u'urgentPointer: \'{urgent_pointer}\'',
u'tcpLength: \'{tcp_length}\'',
u'options: \'{options}\'',
u'padding: \'{padding}\'',
u'syn: \'{syn}\'',
u'ack: \'{ack}\'',
u'psh: \'{psh}\'',
u'fin: \'{fin}\'',
u'rst: \'{rst}\'',
u'urg: \'{urg}\'',
u'direction: \'{direction}\'',
u'clientState: \'{client_state}\'',
u'serverState: \'{server_state}\'',
u'JNetPcapTcpString: \'{jnetpcap_tcp}\'',
u'sourceAddress: \'{source_address}\'',
u'destinationAddress: \'{destination_address}\'',
u'sourceSocketAddress: \'{source_socket_address}\'',
u'destinationSocketAddress: \'{destination_socket_address}\'']
class RubanetraTelnetActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:telnet_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.TelnetActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'subType: \'{sub_type}\'',
u'command: \'{command}\'',
u'option: \'{option}\'',
u'ansiMode: \'{ansi_mode}\'',
u'arguments: \'{arguments}\'',
u'text: \'{text}\'',
u'title: \'{title}\'']
class RubanetraTlsActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:tls_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.TlsActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'clientToServerTraffic: \'{client_to_server_traffic}\'',
u'serverToClientTraffic: \'{server_to_client_traffic}\'']
class RubanetraUdpActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:udp_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.UdpActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'sourcePort: \'{source_port}\'',
u'destinationPort: \'{destination_port}\'',
u'length: \'{length}\'',
u'checksum: \'{checksum}\'',
u'JNetPcapUdpString: \'{jnetpcap_udp}\'',
u'sourceSocketAddress: \'{source_socket_address}\'',
u'destinationSocketAddress: \'{destination_socket_address}\'']
class RubanetraOpenSSHActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:open_ssh_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.OpenSSHActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'clientToServerTraffic: \'{client_to_server_traffic}\'',
u'serverToClientTraffic: \'{server_to_client_traffic}\'']
class RubanetraDropboxTlsActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:dropbox_tls_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.DropboxActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'clientAddress: \'{client_address}\'',
u'serverAddress: \'{server_address}\'']
class RubanetraSpiderOakActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:spideroak_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.SpiderOakActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'clientAddress: \'{client_address}\'',
u'serverAddress: \'{server_address}\'']
class RubanetraSkypePayloadActivityFormatter(RubanetraBaseActivityFormatter):
DATA_TYPE = 'java:rubanetra:skype_payload_activity'
SOURCE_LONG = 'at.jku.fim.rubanetra.SkypePayloadActivity'
FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \
[u'sourceObjectId: \'{source_object_id}\'',
u'destinationObjectId: \'{destination_object_id}\'',
u'sourceHost: \'{source_host}\'',
u'destinationHost: \'{destination_host}\'']
class JavaInstantFormatter(interface.EventFormatter):
""" Formatter for a Java Instant """
DATA_TYPE = 'java:time:Instant'
SOURCE_SHORT = 'JAVA'
SOURCE_LONG = 'java.time.Instant'
FORMAT_STRING = (
u'epoch_seconds: \'{instant_epoch_seconds}, nano: \'{instant_nano}\'')
FORMAT_STRING_SHORT = (u'{instant_epoch_seconds}.{instant_nano}\'')

Some files were not shown because too many files have changed in this diff Show More