Import from old repository

This commit is contained in:
Stefan
2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+280
View File
@@ -0,0 +1,280 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a helper library to read binary files."""
import binascii
import logging
import os
def ByteArrayCopyToString(byte_array, codepage='utf-8'):
"""Copies a UTF-8 encoded byte array into a Unicode string.
Args:
byte_array: A byte array containing an UTF-8 encoded string.
codepage: The codepage of the byte stream. The default is utf-8.
Returns:
A Unicode string.
"""
byte_stream = ''.join(map(chr, byte_array))
return ByteStreamCopyToString(byte_stream, codepage=codepage)
def ByteStreamCopyToString(byte_stream, codepage='utf-8'):
"""Copies a UTF-8 encoded byte stream into a Unicode string.
Args:
byte_stream: A byte stream containing an UTF-8 encoded string.
codepage: The codepage of the byte stream. The default is utf-8.
Returns:
A Unicode string.
"""
try:
string = byte_stream.decode(codepage)
except UnicodeDecodeError:
logging.warning(
u'Unable to decode {0:s} formatted byte stream.'.format(codepage))
string = byte_stream.decode(codepage, errors='ignore')
string, _, _ = string.partition('\x00')
return string
def ByteStreamCopyToGuid(byte_stream, byte_order='little-endian'):
"""Reads a GUID from the byte stream.
Args:
byte_stream: The byte stream that contains the UTF-16 formatted stream.
byte_order: The byte order, either big- or little-endian. The default is
little-endian.
Returns:
String containing the GUID.
"""
if len(byte_stream) >= 16:
if byte_order == 'big-endian':
return (
u'{{{0:02x}{1:02x}{2:02x}{3:02x}-{4:02x}{5:02x}-'
u'{6:02x}{7:02x}-{8:02x}{9:02x}-'
u'{10:02x}{11:02x}{12:02x}{13:02x}{14:02x}{15:02x}}}').format(
*byte_stream[:16])
elif byte_order == 'little-endian':
return (
u'{{{3:02x}{2:02x}{1:02x}{0:02x}-{5:02x}{4:02x}-'
u'{7:02x}{6:02x}-{8:02x}{9:02x}-'
u'{10:02x}{11:02x}{12:02x}{13:02x}{14:02x}{15:02x}}}').format(
*byte_stream[:16])
return u''
def ByteStreamCopyToUtf16Stream(byte_stream, byte_stream_size=None):
"""Reads an UTF-16 formatted stream from a byte stream.
The UTF-16 formatted stream should be terminated by an end-of-string
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The byte stream that contains the UTF-16 formatted stream.
byte_stream_size: Optional byte stream size or None if the entire
byte stream should be read. The default is None.
Returns:
String containing the UTF-16 formatted stream.
"""
byte_stream_index = 0
if not byte_stream_size:
byte_stream_size = len(byte_stream)
while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == '\x00' and
byte_stream[byte_stream_index + 1] == '\x00'):
break
byte_stream_index += 2
return byte_stream[0:byte_stream_index]
def ReadUtf16Stream(file_object, offset=None, byte_size=0):
"""Reads an UTF-16 formatted stream from a file-like object.
Reads an UTF-16 formatted stream that's terminated by
an end-of-string character (\x00\x00) or upto the byte size.
Args:
file_object: A file-like object to read the data from.
offset: An offset into the file object data, if -1 or not set
the current location into the file object data is used.
byte_size: Maximum number of bytes to read or 0 if the function
should keep reading upto the end of file.
Returns:
An Unicode string.
"""
if offset is not None:
file_object.seek(offset, os.SEEK_SET)
char_buffer = []
stream_index = 0
char_raw = file_object.read(2)
while char_raw:
if byte_size and stream_index >= byte_size:
break
if '\x00\x00' in char_raw:
break
char_buffer.append(char_raw)
stream_index += 2
char_raw = file_object.read(2)
return ReadUtf16(''.join(char_buffer))
def Ut16StreamCopyToString(byte_stream, byte_stream_size=None):
"""Copies an UTF-16 formatted byte stream to a string.
The UTF-16 formatted byte stream should be terminated by an end-of-string
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
Returns:
An Unicode string.
"""
utf16_stream = ByteStreamCopyToUtf16Stream(
byte_stream, byte_stream_size=byte_stream_size)
try:
return utf16_stream.decode('utf-16-le')
except (UnicodeDecodeError, UnicodeEncodeError) as exception:
logging.error(u'Unable to decode string: {0:s} with error: {1:s}'.format(
HexifyBuffer(utf16_stream), exception))
return utf16_stream.decode('utf-16-le', errors='ignore')
def ArrayOfUt16StreamCopyToString(byte_stream, byte_stream_size=None):
"""Copies an array of UTF-16 formatted byte streams to an array of strings.
The UTF-16 formatted byte stream should be terminated by an end-of-string
character (\x00\x00). Otherwise the function reads upto the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
Returns:
An array of Unicode strings.
"""
array_of_strings = []
utf16_stream_start = 0
byte_stream_index = 0
if not byte_stream_size:
byte_stream_size = len(byte_stream)
while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == '\x00' and
byte_stream[byte_stream_index + 1] == '\x00'):
if byte_stream_index - utf16_stream_start <= 2:
break
array_of_strings.append(
byte_stream[utf16_stream_start:byte_stream_index].decode(
'utf-16-le'))
utf16_stream_start = byte_stream_index + 2
byte_stream_index += 2
return array_of_strings
def ArrayOfUt16StreamCopyToStringTable(byte_stream, byte_stream_size=None):
"""Copies an array of UTF-16 formatted byte streams to a string table.
The string table is a dict of strings with the byte offset as their key.
The UTF-16 formatted byte stream should be terminated by an end-of-string
character (\x00\x00). Otherwise the function reads upto the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
Returns:
A dict of Unicode strings with the byte offset as their key.
"""
string_table = {}
utf16_stream_start = 0
byte_stream_index = 0
if not byte_stream_size:
byte_stream_size = len(byte_stream)
while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == '\x00' and
byte_stream[byte_stream_index + 1] == '\x00'):
if byte_stream_index - utf16_stream_start <= 2:
break
string = byte_stream[utf16_stream_start:byte_stream_index].decode(
'utf-16-le')
string_table[utf16_stream_start] = string
utf16_stream_start = byte_stream_index + 2
byte_stream_index += 2
return string_table
def ReadUtf16(string_buffer):
"""Returns a decoded UTF-16 string from a string buffer."""
if type(string_buffer) in (list, tuple):
use_buffer = u''.join(string_buffer)
else:
use_buffer = string_buffer
if not type(use_buffer) in (str, unicode):
return u''
try:
return use_buffer.decode('utf-16').replace('\x00', '')
except SyntaxError as exception:
logging.error(u'Unable to decode string: {0:s} with error: {1:s}.'.format(
HexifyBuffer(string_buffer), exception))
except (UnicodeDecodeError, UnicodeEncodeError) as exception:
logging.error(u'Unable to decode string: {0:s} with error: {1:s}'.format(
HexifyBuffer(string_buffer), exception))
return use_buffer.decode('utf-16', errors='ignore').replace('\x00', '')
def HexifyBuffer(string_buffer):
"""Return a string with the hex representation of a string buffer."""
chars = []
for char in string_buffer:
chars.append(binascii.hexlify(char))
return u'\\x{0:s}'.format(u'\\x'.join(chars))
+206
View File
@@ -0,0 +1,206 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a unit test for the binary helper in Plaso."""
import os
import unittest
from plaso.lib import binary
class BinaryTests(unittest.TestCase):
"""A unit test for the binary helper functions."""
def setUp(self):
"""Set up the needed variables used througout."""
# String: "þrándur" - uses surrogate pairs to test four byte character
# decoding.
self._unicode_string_1 = (
'\xff\xfe\xfe\x00\x72\x00\xe1\x00\x6E\x00\x64\x00\x75\x00\x72\x00')
# String: "What\x00is".
self._ascii_string_1 = (
'\x57\x00\x68\x00\x61\x00\x74\x00\x00\x00\x69\x00\x73\x00')
# String: "What is this?".
self._ascii_string_2 = (
'\x57\x00\x68\x00\x61\x00\x74\x00\x20\x00\x69\x00\x73\x00'
'\x20\x00\x74\x00\x68\x00\x69\x00\x73\x00\x3F\x00')
# Show full diff results, part of TestCase so does not follow our naming
# conventions.
self.maxDiff = None
def testReadUtf16Stream(self):
"""Test reading an UTF-16 stream from a file-like object."""
path = os.path.join('test_data', 'PING.EXE-B29F6629.pf')
with open(path, 'rb') as fh:
# Read a null char terminated string.
fh.seek(0x10)
self.assertEquals(binary.ReadUtf16Stream(fh), 'PING.EXE')
# Read a fixed size string.
fh.seek(0x27f8)
expected_string = u'\\DEVICE\\HARDDISKVOLUME'
string = binary.ReadUtf16Stream(fh, byte_size=44)
self.assertEquals(string, expected_string)
fh.seek(0x27f8)
expected_string = u'\\DEVICE\\HARDDISKVOLUME1'
string = binary.ReadUtf16Stream(fh, byte_size=46)
self.assertEquals(string, expected_string)
# Read another null char terminated string.
fh.seek(7236)
self.assertEquals(
binary.ReadUtf16Stream(fh),
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL')
def testUt16StreamCopyToString(self):
"""Test copying an UTF-16 byte stream to a string."""
path = os.path.join('test_data', 'PING.EXE-B29F6629.pf')
with open(path, 'rb') as fh:
byte_stream = fh.read()
# Read a null char terminated string.
self.assertEquals(
binary.Ut16StreamCopyToString(byte_stream[0x10:]), 'PING.EXE')
# Read a fixed size string.
expected_string = u'\\DEVICE\\HARDDISKVOLUME'
string = binary.Ut16StreamCopyToString(
byte_stream[0x27f8:], byte_stream_size=44)
self.assertEquals(string, expected_string)
expected_string = u'\\DEVICE\\HARDDISKVOLUME1'
string = binary.Ut16StreamCopyToString(
byte_stream[0x27f8:], byte_stream_size=46)
self.assertEquals(string, expected_string)
# Read another null char terminated string.
expected_string = (
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL')
string = binary.Ut16StreamCopyToString(byte_stream[7236:])
self.assertEquals(string, expected_string)
def testArrayOfUt16StreamCopyToString(self):
"""Test copying an array of UTF-16 byte streams to strings."""
path = os.path.join('test_data', 'PING.EXE-B29F6629.pf')
with open(path, 'rb') as fh:
byte_stream = fh.read()
strings_array = binary.ArrayOfUt16StreamCopyToString(
byte_stream[0x1c44:], byte_stream_size=2876)
expected_strings_array = [
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNEL32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\APISETSCHEMA.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNELBASE.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LOCALE.NLS',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\PING.EXE',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSVCRT.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SECHOST.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\RPCRT4.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IPHLPAPI.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NSI.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WINNSI.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USER32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\GDI32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LPK.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USP10.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WS2_32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IMM32.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSCTF.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\EN-US\\PING.EXE.MUI',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\GLOBALIZATION\\SORTING\\'
u'SORTDEFAULT.NLS',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSWSOCK.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHQOS.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHTCPIP.DLL',
u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHIP6.DLL']
self.assertEquals(strings_array, expected_strings_array)
def testArrayOfUt16StreamCopyToStringTable(self):
"""Test copying an array of UTF-16 byte streams to a string table."""
path = os.path.join('test_data', 'PING.EXE-B29F6629.pf')
with open(path, 'rb') as fh:
byte_stream = fh.read()
string_table = binary.ArrayOfUt16StreamCopyToStringTable(
byte_stream[0x1c44:], byte_stream_size=2876)
expected_string_table = {
0: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL',
102: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNEL32.DLL',
210: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\'
u'APISETSCHEMA.DLL'),
326: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNELBASE.DLL',
438: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LOCALE.NLS',
542: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\PING.EXE',
642: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL',
750: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSVCRT.DLL',
854: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SECHOST.DLL',
960: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\RPCRT4.DLL',
1064: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IPHLPAPI.DLL',
1172: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NSI.DLL',
1270: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WINNSI.DLL',
1374: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USER32.DLL',
1478: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\GDI32.DLL',
1580: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LPK.DLL',
1678: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USP10.DLL',
1780: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WS2_32.DLL',
1884: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IMM32.DLL',
1986: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSCTF.DLL',
2088: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\EN-US\\'
u'PING.EXE.MUI'),
2208: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\GLOBALIZATION\\'
u'SORTING\\SORTDEFAULT.NLS'),
2348: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSWSOCK.DLL',
2454: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHQOS.DLL',
2558: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHTCPIP.DLL',
2666: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHIP6.DLL'}
self.assertEquals(string_table, expected_string_table)
def testStringParsing(self):
"""Test parsing the ASCII string."""
self.assertEquals(binary.ReadUtf16(self._ascii_string_1), 'Whatis')
self.assertEquals(binary.ReadUtf16(self._ascii_string_2), 'What is this?')
uni_text = binary.ReadUtf16(self._unicode_string_1)
self.assertEquals(uni_text, u'þrándur')
def testHex(self):
"""Test the hexadecimal representation of data."""
hex_string_1 = binary.HexifyBuffer(self._ascii_string_1)
hex_compare = (
'\\x57\\x00\\x68\\x00\\x61\\x00\\x74\\x00\\x00\\x00\\x69\\x00'
'\\x73\\x00')
self.assertEquals(hex_string_1, hex_compare)
hex_string_2 = binary.HexifyBuffer(self._unicode_string_1)
hex_compare_unicode = (
'\\xff\\xfe\\xfe\\x00\\x72\\x00\\xe1\\x00\\x6e\\x00\\x64\\x00'
'\\x75\\x00\\x72\\x00')
self.assertEquals(hex_string_2, hex_compare_unicode)
if __name__ == '__main__':
unittest.main()
+77
View File
@@ -0,0 +1,77 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains buffer related objects used in plaso."""
class CircularBuffer(object):
"""Simple circular buffer for storing EventObjects."""
def __init__(self, size):
"""Initialize a fixed size circular buffer.
Args:
size: An integer indicating the number of elements in the buffer.
"""
self._size = size
self._index = 0
self._list = []
def __len__(self):
"""Return the length (the fixed size)."""
return self._size
@property
def size(self):
return self._size
def GetCurrent(self):
"""Return the current item that index points to."""
index = self._index - 1
if index < 0:
return
return self._list[index]
def Clear(self):
"""Clear all elements in the list."""
self._list = []
self._index = 0
def __iter__(self):
"""Return all elements from the list."""
for index in range(0, self._size):
try:
yield self._list[(self._index + index) % self._size]
except IndexError:
pass
def Flush(self):
"""Return a generator for all items and clear the buffer."""
for item in self:
yield item
self.Clear()
def Append(self, item):
"""Add an item to the list."""
if self._index >= self._size:
self._index = self._index % self._size
try:
self._list[self._index] = item
except IndexError:
self._list.append(item)
self._index += 1
+60
View File
@@ -0,0 +1,60 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for plaso.lib.buffer"""
import unittest
from plaso.lib import bufferlib
class TestBuffer(unittest.TestCase):
"""Test the circular buffer."""
def testBuffer(self):
items = range(1, 11)
circular_buffer = bufferlib.CircularBuffer(10)
self.assertEquals(len(circular_buffer), 10)
self.assertEquals(circular_buffer.size, 10)
self.assertTrue(circular_buffer.GetCurrent() is None)
for item in items:
circular_buffer.Append(item)
self.assertEquals(circular_buffer.GetCurrent(), item)
self.assertEquals(circular_buffer.size, 10)
content = list(circular_buffer)
self.assertEquals(items, content)
circular_buffer.Append(11)
self.assertEquals(
[2, 3, 4, 5, 6, 7, 8, 9, 10, 11], list(circular_buffer.Flush()))
self.assertEquals(circular_buffer.GetCurrent(), None)
new_items = range(1, 51)
for item in new_items:
circular_buffer.Append(item)
self.assertEquals(circular_buffer.GetCurrent(), item)
self.assertEquals(circular_buffer.size, 10)
self.assertEquals(range(41, 51), list(circular_buffer))
if __name__ == '__main__':
unittest.main()
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the error classes."""
class Error(Exception):
"""Base error class."""
class BadConfigOption(Error):
"""Raised when the engine is started with a faulty parameter."""
class CollectorError(Error):
"""Class that defines collector errors."""
class NotAText(Error):
"""Raised when trying to read a text on a non-text sample."""
class NoFormatterFound(Error):
"""Raised when no formatter is found for a particular event."""
class PathNotFound(Error):
"""Raised when a preprocessor fails to fill in a path variable."""
class PreProcessFail(Error):
"""Raised when a preprocess module is unable to gather information."""
class ProxyFailedToStart(Error):
"""Raised when unable to start a proxy."""
class QueueEmpty(Error):
"""Class that implements a queue empty exception."""
class QueueFull(Error):
"""Class that implements a queue full exception."""
class SameFileType(Error):
"""Raised when a file is being evaluated against the same driver type."""
class SourceScannerError(Error):
"""Class that defines source scanner errors."""
class TimestampNotCorrectlyFormed(Error):
"""Raised when there is an error adding a timestamp to an EventObject."""
class UnableToOpenFile(Error):
"""Raised when a PlasoFile class attempts to open a file it cannot open."""
class UnableToOpenFilesystem(Error):
"""Raised when unable to open filesystem."""
class UnableToParseFile(Error):
"""Raised when a parser is not designed to parse a file."""
class UserAbort(Error):
"""Class that defines an user initiated abort exception."""
class WrongBencodePlugin(Error):
"""Error reporting wrong bencode plugin used."""
class WrongFilterOption(Error):
"""Raised when the filter option is badly formed."""
class WrongFormatter(Error):
"""Raised when the formatter is not applicable for a particular event."""
class WrongPlistPlugin(Error):
"""Error reporting wrong plist plugin used."""
class WrongPlugin(Error):
"""Raised when the plugin is of the wrong type."""
class WrongProtobufEntry(Error):
"""Raised when an EventObject cannot be serialized as a protobuf."""
class WinRegistryValueError(Error):
"""Raised when there is an issue reading a registry value."""
+478
View File
@@ -0,0 +1,478 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The core object definitions, e.g. the event object."""
import collections
import logging
import uuid
from plaso.formatters import manager as formatters_manager
from plaso.lib import timelib
from plaso.lib import utils
import pytz
class AnalysisReport(object):
"""Class that defines an analysis report."""
def __init__(self):
"""Initializes the analysis report."""
super(AnalysisReport, self).__init__()
self._anomalies = []
self._tags = []
def __unicode__(self):
"""Return an unicode string representation of the report."""
return self.GetString()
def GetAnomalies(self):
"""Retrieves the list of anomalies that are attached to the report."""
return self._anomalies
def GetString(self):
"""Return an unicode string representation of the report."""
# TODO: Make this a more complete function that includes images
# and the option of saving as a full fledged HTML document.
string_list = []
string_list.append(u'Report generated from: {0:s}'.format(self.plugin_name))
time_compiled = getattr(self, 'time_compiled', 0)
if time_compiled:
time_compiled = timelib.Timestamp.CopyToIsoFormat(time_compiled)
string_list.append(u'Generated on: {0:s}'.format(time_compiled))
filter_string = getattr(self, 'filter_string', '')
if filter_string:
string_list.append(u'Filter String: {0:s}'.format(filter_string))
string_list.append(u'')
string_list.append(u'Report text:')
string_list.append(self.text)
return u'\n'.join(string_list)
def GetTags(self):
"""Retrieves the list of event tags that are attached to the report."""
return self._tags
# TODO: rename text to body?
def SetText(self, lines_of_text):
"""Sets the text based on a list of lines of text.
Args:
lines_of_text: a list containing lines of text.
"""
# Append one empty string to make sure a new line is added to the last
# line of text as well.
lines_of_text.append(u'')
self.text = u'\n'.join(lines_of_text)
# TODO: Re-design the event object to make it lighter, perhaps template
# based. The current design is too slow and needs to be improved.
class EventObject(object):
"""An event object is the main datastore for an event in plaso.
The framework is designed to parse files and create an event
from every single record, line or key extracted from the file.
An EventObject is the main data storage for an event in plaso.
This class defines the high level interface of EventObject.
Before creating an EventObject a class needs to be implemented
that inherits from EventObject and implements the functions in it.
The EventObject is then used by output processing for saving
in other forms, such as a protobuff, AFF4 container, CSV files,
databases, etc.
The goal of the EventObject is to provide a easily extensible
data storage of each events internally in the tool.
The main EventObject only exposes those functions that the
implementations need to implement. The functions that are needed
simply provide information about the event, or describe the
attributes that are necessary. How they are assembled is totally
up to the implementation.
All required attributes of the EventObject are passed to the
constructor of the object while the optional ones are set
using the method SetValue(attribute, value).
"""
# This is a convenience variable to define event object as
# simple value objects. Its runtime equivalent data_type
# should be used in code logic.
DATA_TYPE = ''
# This is a reserved variable just used for comparison operation and defines
# attributes that should not be used during evaluation of whether two
# EventObjects are the same.
COMPARE_EXCLUDE = frozenset([
'timestamp', 'inode', 'pathspec', 'filename', 'uuid',
'data_type', 'display_name', 'store_number', 'store_index', 'tag'])
def __init__(self):
"""Initializes the event object."""
self.uuid = uuid.uuid4().get_hex()
if self.DATA_TYPE:
self.data_type = self.DATA_TYPE
def EqualityString(self):
"""Return a string describing the EventObject in terms of object equality.
The details of this function must match the logic of __eq__. EqualityStrings
of two event objects should be the same if and only if the EventObjects are
equal as described in __eq__.
Returns:
String: will match another EventObject's Equality String if and only if
the EventObjects are equal
"""
fields = sorted(list(self.GetAttributes().difference(self.COMPARE_EXCLUDE)))
# TODO: Review this (after 1.1.0 release). Is there a better/more clean
# method of removing the timestamp description field out of the fields list?
parser = getattr(self, 'parser', u'')
if parser == u'filestat':
# We don't want to compare the timestamp description field when comparing
# filestat events. This is done to be able to join together FILE events
# that have the same timestamp, yet different description field (as in an
# event that has for instance the same timestamp for mtime and atime,
# joining it together into a single event).
try:
timestamp_desc_index = fields.index('timestamp_desc')
del fields[timestamp_desc_index]
except ValueError:
pass
basic = [self.timestamp, self.data_type]
attributes = []
for attribute in fields:
value = getattr(self, attribute)
if type(value) is dict:
attributes.append(sorted(value.items()))
elif type(value) is set:
attributes.append(sorted(list(value)))
else:
attributes.append(value)
identity = basic + [x for pair in zip(fields, attributes) for x in pair]
if parser == 'filestat':
inode = getattr(self, 'inode', 'a')
if inode == 'a':
inode = '_' + str(uuid.uuid4())
identity.append('inode')
identity.append(inode)
return u'|'.join(map(unicode, identity))
def __eq__(self, event_object):
"""Return a boolean indicating if two EventObject are considered equal.
Compares two EventObject objects together and evaluates if they are
the same or close enough to be considered to represent the same event.
For two EventObject objects to be considered the same they need to
have the following conditions:
+ Have the same timestamp.
+ Have the same data_type value.
+ Have the same set of attributes.
+ Compare all other attributes than those that are reserved, and
they all have to match.
The following attributes are considered to be 'reserved' and not used
for the comparison, so they may be different yet the EventObject is still
considered to be equal:
+ inode
+ pathspec
+ filename
+ display_name
+ store_number
+ store_index
Args:
event_object: The EventObject that is being compared to this one.
Returns:
True: if both EventObjects are considered equal, otherwise False.
"""
# Note: if this method changes, the above EqualityString method MUST be
# updated as well
if not isinstance(event_object, EventObject):
return False
if self.timestamp != event_object.timestamp:
return False
if self.data_type != event_object.data_type:
return False
attributes = self.GetAttributes()
if attributes != event_object.GetAttributes():
return False
# Here we have to deal with "near" duplicates, so not all attributes
# should be compared.
for attribute in attributes.difference(self.COMPARE_EXCLUDE):
if getattr(self, attribute) != getattr(event_object, attribute):
return False
# If we are dealing with the stat parser the inode number is the one
# attribute that really matters, unlike others.
if 'filestat' in getattr(self, 'parser', ''):
return utils.GetUnicodeString(getattr(
self, 'inode', 'a')) == utils.GetUnicodeString(getattr(
event_object, 'inode', 'b'))
return True
def GetAttributes(self):
"""Return a list of all defined attributes."""
return set(self.__dict__.keys())
def GetValues(self):
"""Returns a dictionary of all defined attributes and their values."""
values = {}
for attribute_name in self.GetAttributes():
values[attribute_name] = getattr(self, attribute_name)
return values
def GetString(self):
"""Return a unicode string representation of an EventObject."""
return unicode(self)
def __str__(self):
"""Return a string object of the EventObject."""
return unicode(self).encode('utf-8')
def __unicode__(self):
"""Print a human readable string from the EventObject."""
out_write = []
out_write.append(u'+-' * 40)
out_write.append(u'[Timestamp]:\n {0:s}'.format(
timelib.Timestamp.CopyToIsoFormat(self.timestamp)))
out_write.append(u'\n[Message Strings]:')
# TODO: move formatting testing to a formatters (manager) test.
event_formatter = formatters_manager.EventFormatterManager.GetFormatter(
self)
if not event_formatter:
out_write.append(u'None')
else:
msg, msg_short = event_formatter.GetMessages(self)
source_short, source_long = event_formatter.GetSources(self)
out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format(
utils.GetUnicodeString(msg_short), utils.GetUnicodeString(msg),
'Short', 'Long'))
out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format(
utils.GetUnicodeString(source_short),
utils.GetUnicodeString(source_long), 'Source Short', 'Source Long'))
if hasattr(self, 'pathspec'):
pathspec_string = self.pathspec.comparable
out_write.append(u'[Pathspec]:\n {0:s}\n'.format(
pathspec_string.replace('\n', '\n ')))
out_additional = []
out_write.append(u'[Reserved attributes]:')
out_additional.append(u'[Additional attributes]:')
for attr_key, attr_value in sorted(self.GetValues().items()):
if attr_key in utils.RESERVED_VARIABLES:
if attr_key == 'pathspec':
continue
else:
out_write.append(
u' {{{key}}} {value}'.format(key=attr_key, value=attr_value))
else:
out_additional.append(
u' {{{key}}} {value}'.format(key=attr_key, value=attr_value))
out_write.append(u'\n')
out_additional.append(u'')
part_1 = u'\n'.join(out_write)
part_2 = u'\n'.join(out_additional)
return part_1 + part_2
class EventTag(object):
"""A native Python object for the EventTagging protobuf.
The EventTag object should have the following attributes:
(optional attributes surrounded with brackets)
+ store_number: An integer, pointing to the store the EventObject is.
+ store_index: An index into the store where the EventObject is.
+ event_uuid: An UUID value of the event this tag belongs to.
+ [comment]: An arbitrary string containing comments about the event.
+ [color]: A string containing color information.
+ [tags]: A list of strings with tags, eg: 'Malware', 'Entry Point'.
The tag either needs to have an event_uuid defined or both the store_number
and store_index to be valid (not both, if both defined the store_number and
store_index will be used).
"""
# TODO: Enable __slots__ once we tested the first round of changes.
@property
def string_key(self):
"""Return a string index key for this tag."""
if not self.IsValidForSerialization():
return ''
uuid_string = getattr(self, 'event_uuid', None)
if uuid_string:
return uuid_string
return u'{}:{}'.format(self.store_number, self.store_index)
def GetString(self):
"""Retrieves a string representation of the event."""
ret = []
ret.append(u'-' * 50)
if getattr(self, 'store_number', 0):
ret.append(u'{0:>7}:\n\tNumber: {1}\n\tIndex: {2}'.format(
'Store', self.store_number, self.store_index))
else:
ret.append(u'{0:>7}:\n\tUUID: {1}'.format('Store', self.event_uuid))
if hasattr(self, 'comment'):
ret.append(u'{:>7}: {}'.format('Comment', self.comment))
if hasattr(self, 'color'):
ret.append(u'{:>7}: {}'.format('Color', self.color))
if hasattr(self, 'tags'):
ret.append(u'{:>7}: {}'.format('Tags', u','.join(self.tags)))
return u'\n'.join(ret)
def IsValidForSerialization(self):
"""Return whether or not this is a valid tag object."""
if getattr(self, 'event_uuid', None):
return True
if getattr(self, 'store_number', 0) and getattr(
self, 'store_index', -1) >= 0:
return True
return False
class PreprocessObject(object):
"""Object used to store all information gained from preprocessing."""
def __init__(self):
"""Initializes the preprocess object."""
super(PreprocessObject, self).__init__()
self._user_ids_to_names = None
self.zone = pytz.UTC
def GetUserMappings(self):
"""Returns a dictionary objects mapping SIDs or UIDs to usernames."""
if self._user_ids_to_names is None:
self._user_ids_to_names = {}
if self._user_ids_to_names:
return self._user_ids_to_names
for user in getattr(self, 'users', []):
if 'sid' in user:
user_id = user.get('sid', u'')
elif 'uid' in user:
user_id = user.get('uid', u'')
else:
user_id = u''
if user_id:
self._user_ids_to_names[user_id] = user.get('name', user_id)
return self._user_ids_to_names
def GetUsernameById(self, user_id):
"""Returns a username for a specific user identifier.
Args:
user_id: The user identifier, either a SID or UID.
Returns:
If available the user name for the identifier, otherwise the string '-'.
"""
user_ids_to_names = self.GetUserMappings()
return user_ids_to_names.get(user_id, '-')
# TODO: change to property with getter and setter.
def SetTimezone(self, timezone_identifier):
"""Sets the timezone.
Args:
timezone_identifier: string containing the identifier of the timezone,
e.g. 'UTC' or 'Iceland'.
"""
try:
self.zone = pytz.timezone(timezone_identifier)
except pytz.UnknownTimeZoneError as exception:
logging.warning(
u'Unable to set timezone: {0:s} with error: {1:s}.'.format(
timezone_identifier, exception))
def SetCollectionInformationValues(self, dict_object):
"""Sets the collection information values.
Args:
dict_object: dictionary object containing the collection information
values.
"""
self.collection_information = dict(dict_object)
if 'configure_zone' in self.collection_information:
self.collection_information['configure_zone'] = pytz.timezone(
self.collection_information['configure_zone'])
def SetCounterValues(self, dict_object):
"""Sets the counter values.
Args:
dict_object: dictionary object containing the counter values.
"""
self.counter = collections.Counter()
for key, value in dict_object.iteritems():
self.counter[key] = value
def SetPluginCounterValues(self, dict_object):
"""Sets the plugin counter values.
Args:
dict_object: dictionary object containing the plugin counter values.
"""
self.plugin_counter = collections.Counter()
for key, value in dict_object.iteritems():
self.plugin_counter[key] = value
# Named tuple that defines a parse error.
#
# Attributes:
# name: The parser or plugin name.
# description: The description of the error.
# path_spec: Optional path specification of the file entry (instance of
# dfvfs.PathSpec). The default is None.
ParseError = collections.namedtuple(
'ParseError', 'name description path_spec')
+324
View File
@@ -0,0 +1,324 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a unit test for the EventObject.
This is an implementation of an unit test for EventObject storage mechanism for
plaso.
The test consists of creating six EventObjects.
Error handling. The following tests are performed for error handling:
+ Access attributes that are not set.
"""
import unittest
from plaso.events import text_events
from plaso.events import windows_events
from plaso.lib import event
from plaso.lib import timelib_test
class TestEvent1(event.EventObject):
"""A test event object."""
DATA_TYPE = 'test:event1'
def __init__(self, timestamp, attributes):
"""Initializes the test event object."""
super(TestEvent1, self).__init__()
self.timestamp = timestamp
self.timestamp_desc = 'Some time in the future'
for attribute, value in attributes.iteritems():
setattr(self, attribute, value)
class FailEvent(event.EventObject):
"""An test event object without the minimal required initialization."""
def GetEventObjects():
"""Returns a list of test event objects."""
event_objects = []
hostname = 'MYHOSTNAME'
data_type = 'test:event1'
event_a = event.EventObject()
event_a.username = 'joesmith'
event_a.filename = 'c:/Users/joesmith/NTUSER.DAT'
event_a.hostname = hostname
event_a.timestamp = 0
event_a.data_type = data_type
# TODO: move this to a WindowRegistrysEvent unit test.
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-20 22:38:46.929596')
event_b = windows_events.WindowsRegistryEvent(
timestamp, u'MY AutoRun key', {u'Run': u'c:/Temp/evil.exe'})
event_b.hostname = hostname
event_objects.append(event_b)
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-20 23:56:46.929596')
event_c = windows_events.WindowsRegistryEvent(
timestamp, u'//HKCU/Secret/EvilEmpire/Malicious_key',
{u'Value': u'send all the exes to the other world'})
event_c.hostname = hostname
event_objects.append(event_c)
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-20 16:44:46.000000')
event_d = windows_events.WindowsRegistryEvent(
timestamp, u'//HKCU/Windows/Normal',
{u'Value': u'run all the benign stuff'})
event_d.hostname = hostname
event_objects.append(event_d)
event_objects.append(event_a)
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-30 10:29:47.929596')
filename = 'c:/Temp/evil.exe'
event_e = TestEvent1(timestamp, {
'text': 'This log line reads ohh so much.'})
event_e.filename = filename
event_e.hostname = hostname
event_objects.append(event_e)
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-30 10:29:47.929596')
event_f = TestEvent1(timestamp, {
'text': 'Nothing of interest here, move on.'})
event_f.filename = filename
event_f.hostname = hostname
event_objects.append(event_f)
timestamp = timelib_test.CopyStringToTimestamp(
'2012-04-30 13:06:47.939596')
event_g = TestEvent1(timestamp, {
'text': 'Mr. Evil just logged into the machine and got root.'})
event_g.filename = filename
event_g.hostname = hostname
event_objects.append(event_g)
text_dict = {'body': (
u'This is a line by someone not reading the log line properly. And '
u'since this log line exceeds the accepted 80 chars it will be '
u'shortened.'), 'hostname': u'nomachine', 'username': u'johndoe'}
# TODO: move this to a TextEvent unit test.
timestamp = timelib_test.CopyStringToTimestamp(
'2012-06-05 22:14:19.000000')
event_h = text_events.TextEvent(timestamp, 12, text_dict)
event_h.text = event_h.body
event_h.hostname = hostname
event_h.filename = filename
event_objects.append(event_h)
return event_objects
class EventObjectTest(unittest.TestCase):
"""Tests for the event object."""
def testSameEvent(self):
"""Test the EventObject comparison."""
event_a = event.EventObject()
event_b = event.EventObject()
event_c = event.EventObject()
event_d = event.EventObject()
event_e = event.EventObject()
event_a.timestamp = 123
event_a.timestamp_desc = u'LAST WRITTEN'
event_a.data_type = 'mock:nothing'
event_a.inode = 124
event_a.filename = u'c:/bull/skrytinmappa/skra.txt'
event_a.another_attribute = False
event_a.metadata = {
u'author': u'Some Random Dude',
u'version': 1245L,
u'last_changed': u'Long time ago'}
event_a.strings = [
u'This ', u'is a ', u'long string']
event_b.timestamp = 123
event_b.timestamp_desc = 'LAST WRITTEN'
event_b.data_type = 'mock:nothing'
event_b.inode = 124
event_b.filename = 'c:/bull/skrytinmappa/skra.txt'
event_b.another_attribute = False
event_b.metadata = {
'author': 'Some Random Dude',
'version': 1245L,
'last_changed': 'Long time ago'}
event_b.strings = [
'This ', 'is a ', 'long string']
event_c.timestamp = 123
event_c.timestamp_desc = 'LAST UPDATED'
event_c.data_type = 'mock:nothing'
event_c.inode = 124
event_c.filename = 'c:/bull/skrytinmappa/skra.txt'
event_c.another_attribute = False
event_d.timestamp = 14523
event_d.timestamp_desc = 'LAST WRITTEN'
event_d.data_type = 'mock:nothing'
event_d.inode = 124
event_d.filename = 'c:/bull/skrytinmappa/skra.txt'
event_d.another_attribute = False
event_e.timestamp = 123
event_e.timestamp_desc = 'LAST WRITTEN'
event_e.data_type = 'mock:nothing'
event_e.inode = 623423
event_e.filename = 'c:/afrit/onnurskra.txt'
event_e.another_attribute = False
event_e.metadata = {
'author': 'Some Random Dude',
'version': 1245,
'last_changed': 'Long time ago'}
event_e.strings = [
'This ', 'is a ', 'long string']
self.assertEquals(event_a, event_b)
self.assertNotEquals(event_a, event_c)
self.assertEquals(event_a, event_e)
self.assertNotEquals(event_c, event_d)
def testEqualityString(self):
"""Test the EventObject EqualityString."""
event_a = event.EventObject()
event_b = event.EventObject()
event_c = event.EventObject()
event_d = event.EventObject()
event_e = event.EventObject()
event_f = event.EventObject()
event_a.timestamp = 123
event_a.timestamp_desc = 'LAST WRITTEN'
event_a.data_type = 'mock:nothing'
event_a.inode = 124
event_a.filename = 'c:/bull/skrytinmappa/skra.txt'
event_a.another_attribute = False
event_b.timestamp = 123
event_b.timestamp_desc = 'LAST WRITTEN'
event_b.data_type = 'mock:nothing'
event_b.inode = 124
event_b.filename = 'c:/bull/skrytinmappa/skra.txt'
event_b.another_attribute = False
event_c.timestamp = 123
event_c.timestamp_desc = 'LAST UPDATED'
event_c.data_type = 'mock:nothing'
event_c.inode = 124
event_c.filename = 'c:/bull/skrytinmappa/skra.txt'
event_c.another_attribute = False
event_d.timestamp = 14523
event_d.timestamp_desc = 'LAST WRITTEN'
event_d.data_type = 'mock:nothing'
event_d.inode = 124
event_d.filename = 'c:/bull/skrytinmappa/skra.txt'
event_d.another_attribute = False
event_e.timestamp = 123
event_e.timestamp_desc = 'LAST WRITTEN'
event_e.data_type = 'mock:nothing'
event_e.inode = 623423
event_e.filename = 'c:/afrit/öñṅûŗ₅ḱŖūα.txt'
event_e.another_attribute = False
event_f.timestamp = 14523
event_f.timestamp_desc = 'LAST WRITTEN'
event_f.data_type = 'mock:nothing'
event_f.inode = 124
event_f.filename = 'c:/bull/skrytinmappa/skra.txt'
event_f.another_attribute = False
event_f.weirdness = 'I am a potato'
self.assertEquals(event_a.EqualityString(), event_b.EqualityString())
self.assertNotEquals(event_a.EqualityString(), event_c.EqualityString())
self.assertEquals(event_a.EqualityString(), event_e.EqualityString())
self.assertNotEquals(event_c.EqualityString(), event_d.EqualityString())
self.assertNotEquals(event_d.EqualityString(), event_f.EqualityString())
def testEqualityFileStatParserMissingInode(self):
"""Test that FileStatParser files with missing inodes are distinct"""
event_a = event.EventObject()
event_b = event.EventObject()
event_a.timestamp = 123
event_a.timestamp_desc = 'LAST WRITTEN'
event_a.data_type = 'mock:nothing'
event_a.parser = 'filestat'
event_a.filename = 'c:/bull/skrytinmappa/skra.txt'
event_a.another_attribute = False
event_b.timestamp = 123
event_b.timestamp_desc = 'LAST WRITTEN'
event_b.data_type = 'mock:nothing'
event_b.parser = 'filestat'
event_b.filename = 'c:/bull/skrytinmappa/skra.txt'
event_b.another_attribute = False
self.assertNotEquals(event_a, event_b)
def testEqualityStringFileStatParserMissingInode(self):
"""Test that FileStatParser files with missing inodes are distinct"""
event_a = event.EventObject()
event_b = event.EventObject()
event_a.timestamp = 123
event_a.timestamp_desc = 'LAST WRITTEN'
event_a.data_type = 'mock:nothing'
event_a.parser = 'filestat'
event_a.filename = 'c:/bull/skrytinmappa/skra.txt'
event_a.another_attribute = False
event_b.timestamp = 123
event_b.timestamp_desc = 'LAST WRITTEN'
event_b.data_type = 'mock:nothing'
event_b.parser = 'filestat'
event_b.filename = 'c:/bull/skrytinmappa/skra.txt'
event_b.another_attribute = False
self.assertNotEquals(event_a.EqualityString(), event_b.EqualityString())
def testNotInEventAndNoParent(self):
"""Call to an attribute that does not exist."""
event_object = TestEvent1(0, {})
with self.assertRaises(AttributeError):
getattr(event_object, 'doesnotexist')
def testFailEvent(self):
"""Calls to format_string_short that has not been defined."""
e = FailEvent()
with self.assertRaises(AttributeError):
getattr(e, 'format_string_short')
if __name__ == '__main__':
unittest.main()
+65
View File
@@ -0,0 +1,65 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A place to store information about events, such as format strings, etc."""
# TODO: move this class to events/definitions.py or equiv.
class EventTimestamp(object):
"""Class to manage event data."""
# The timestamp_desc values.
ACCESS_TIME = u'Last Access Time'
CHANGE_TIME = u'Metadata Modification Time'
CREATION_TIME = u'Creation Time'
MODIFICATION_TIME = u'Content Modification Time'
ENTRY_MODIFICATION_TIME = u'Metadata Modification Time'
# Added time and Creation time are considered the same.
ADDED_TIME = u'Creation Time'
# Written time and Modification time are considered the same.
WRITTEN_TIME = u'Content Modification Time'
EXIT_TIME = u'Exit Time'
LAST_RUNTIME = u'Last Time Executed'
DELETED_TIME = u'Content Deletion Time'
FILE_DOWNLOADED = u'File Downloaded'
PAGE_VISITED = u'Page Visited'
# TODO: change page visited into last visited time.
LAST_VISITED_TIME = u'Last Visited Time'
LAST_CHECKED_TIME = u'Last Checked Time'
EXPIRATION_TIME = u'Expiration Time'
START_TIME = u'Start Time'
END_TIME = u'End Time'
LAST_SHUTDOWN = u'Last Shutdown Time'
ACCOUNT_CREATED = u'Account Created'
LAST_LOGIN_TIME = u'Last Login Time'
LAST_PASSWORD_RESET = u'Last Password Reset'
FIRST_CONNECTED = u'First Connection Time'
LAST_CONNECTED = u'Last Connection Time'
LAST_PRINTED = u'Last Printed Time'
LAST_RESUME_TIME = u'Last Resume Time'
# Note that the unknown time is used for date and time values
# of which the exact meaning is unknown and being researched.
# For most cases do not use this timestamp description.
UNKNOWN = u'Unknown Time'
+94
View File
@@ -0,0 +1,94 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A definition of the filter interface for filters in plaso."""
import abc
from plaso.lib import errors
from plaso.lib import registry
class FilterObject(object):
"""The interface that each filter needs to implement in plaso."""
__metaclass__ = registry.MetaclassRegistry
__abstract = True
@property
def filter_name(self):
"""Return the name of the filter."""
return self.__class__.__name__
@property
def last_decision(self):
"""Return the last matching decision."""
return getattr(self, '_decision', None)
@property
def last_reason(self):
"""Return the last reason for the match, if there was one."""
if getattr(self, 'last_decision', False):
return getattr(self, '_reason', '')
@property
def fields(self):
"""Return a list of fields for adaptive output modules."""
return []
@property
def separator(self):
"""Return a separator for adaptive output modules."""
return ','
@property
def limit(self):
"""Returns the max number of records to return, or zero for all records."""
return 0
@abc.abstractmethod
def CompileFilter(self, unused_filter_string):
"""Verify filter string and prepare the filter for later usage.
This function verifies the filter string matches the definition of
the class and if necessary compiles or prepares the filter so it can start
matching against passed in EventObjects.
Args:
unused_filter_string: A string passed in that should be recognized by
the filter class.
Raises:
errors.WrongPlugin: If this filter string does not match the filter
class.
"""
raise errors.WrongPlugin('Not the correct filter for this string.')
def Match(self, unused_event_object):
"""Compare an EventObject to the filter expression and return a boolean.
This function returns True if the filter should be passed through the filter
and False otherwise.
Args:
unused_event_object: An event object (instance of EventObject) that
should be evaluated against the filter.
Returns:
Boolean indicating whether the filter matches the object or not.
"""
return False
+514
View File
@@ -0,0 +1,514 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An LL(1) lexer. This lexer is very tolerant of errors and can resync.
This lexer is originally copied from the GRR project:
https://code.google.com/p/grr
"""
import logging
import re
class Token(object):
"""A token action."""
def __init__(self, state_regex, regex, actions, next_state, flags=re.I):
"""Initializes the token object.
Args:
state_regex: If this regular expression matches the current state this
rule is considered.
regex: A regular expression to try and match from the current point.
actions: A command separated list of method names in the Lexer to call.
next_state: The next state we transition to if this Token matches.
flags: re flags.
"""
self.state_regex = re.compile(
state_regex, re.DOTALL | re.M | re.S | re.U | flags)
self.regex = re.compile(regex, re.DOTALL | re.M | re.S | re.U | flags)
self.re_str = regex
self.actions = []
if actions:
self.actions = actions.split(',')
self.next_state = next_state
def Action(self, lexer):
"""Method is called when the token matches."""
class Error(Exception):
"""Module exception."""
class ParseError(Error):
"""A parse error occured."""
class Lexer(object):
"""A generic feed lexer."""
_CONTINUE_STATE = 'CONTINUE'
_INITIAL_STATE = 'INITIAL'
_ERROR_TOKEN = 'Error'
# A list of Token() instances.
tokens = []
def __init__(self, data=''):
"""Initializes the lexer object."""
super(Lexer, self).__init__()
self.buffer = data
self.error = 0
self.flags = 0
self.processed = 0
self.processed_buffer = ''
self.state = self._INITIAL_STATE
self.state_stack = []
self.verbose = 0
def NextToken(self):
"""Fetch the next token by trying to match any of the regexes in order."""
current_state = self.state
for token in self.tokens:
# Does the rule apply to us?
if not token.state_regex.match(current_state):
continue
# Try to match the rule
m = token.regex.match(self.buffer)
if not m:
continue
# The match consumes the data off the buffer (the handler can put it back
# if it likes)
# TODO: using joins might be more efficient here.
self.processed_buffer += self.buffer[:m.end()]
self.buffer = self.buffer[m.end():]
self.processed += m.end()
next_state = token.next_state
for action in token.actions:
# Is there a callback to handle this action?
callback = getattr(self, action, self.Default)
# Allow a callback to skip other callbacks.
try:
possible_next_state = callback(string=m.group(0), match=m)
if possible_next_state == self._CONTINUE_STATE:
continue
# Override the state from the Token
elif possible_next_state:
next_state = possible_next_state
except ParseError as exception:
self.Error(exception)
# Update the next state
if next_state:
self.state = next_state
return token
# Check that we are making progress - if we are too full, we assume we are
# stuck.
self.Error(u'Expected {0:s}'.format(self.state))
self.processed_buffer += self.buffer[:1]
self.buffer = self.buffer[1:]
return self._ERROR_TOKEN
def Feed(self, data):
"""Feed the buffer with data."""
self.buffer = ''.join([self.buffer, data])
def Empty(self):
"""Return a boolean indicating if the buffer is empty."""
return not self.buffer
def Default(self, **kwarg):
"""The default callback handler."""
logging.debug(u'Default handler: {0:s}'.format(kwarg))
def Error(self, message=None, weight=1):
"""Log an error down."""
logging.debug(u'Error({0:d}): {1:s}'.format(weight, message))
# Keep a count of errors
self.error += weight
def PushState(self, **_):
"""Push the current state on the state stack."""
logging.debug(u'Storing state {0:s}'.format(repr(self.state)))
self.state_stack.append(self.state)
def PopState(self, **_):
"""Pop the previous state from the stack."""
try:
self.state = self.state_stack.pop()
logging.debug(u'Returned state to {0:s}'.format(self.state))
return self.state
except IndexError:
self.Error(
u'Tried to pop the state but failed - possible recursion error')
def PushBack(self, string='', **_):
"""Push the match back on the stream."""
self.buffer = string + self.buffer
self.processed_buffer = self.processed_buffer[:-len(string)]
def Close(self):
"""A convenience function to force us to parse all the data."""
while self.NextToken():
if not self.buffer:
return
class SelfFeederMixIn(Lexer):
"""This mixin is used to make a lexer which feeds itself.
Note that self.file_object must be the file object we read from.
"""
# TODO: fix this, file object either needs to be set or not passed here.
def __init__(self, file_object=None):
"""Initializes the lexer feeder min object.
Args:
file_object: Optional file-like object. The default is None.
"""
super(SelfFeederMixIn, self).__init__()
self.file_object = file_object
def NextToken(self):
"""Return the next token."""
# If we don't have enough data - feed ourselves: We assume
# that we must have at least one sector in our buffer.
if len(self.buffer) < 512:
if self.Feed() == 0 and not self.buffer:
return None
return Lexer.NextToken(self)
def Feed(self, size=512):
"""Feed data into the buffer."""
data = self.file_object.read(size)
Lexer.Feed(self, data)
return len(data)
class Expression(object):
"""A class representing an expression."""
attribute = None
args = None
operator = None
# The expected number of args
number_of_args = 1
def __init__(self):
"""Initializes the expression object."""
self.args = []
def SetAttribute(self, attribute):
"""Set the attribute."""
self.attribute = attribute
def SetOperator(self, operator):
"""Set the operator."""
self.operator = operator
def AddArg(self, arg):
"""Adds a new arg to this expression.
Args:
arg: The argument to add (string).
Returns:
True if this arg is the last arg, False otherwise.
Raises:
ParseError: If there are too many args.
"""
self.args.append(arg)
if len(self.args) > self.number_of_args:
raise ParseError(u'Too many args for this expression.')
elif len(self.args) == self.number_of_args:
return True
return False
def __str__(self):
"""Return a string representation of the expression."""
return 'Expression: ({0:s}) ({1:s}) {2:s}'.format(
self.attribute, self.operator, self.args)
# TODO: rename this function to GetTreeAsString or equivalent.
def PrintTree(self, depth=''):
"""Print the tree."""
return u'{0:s} {1:s}'.format(depth, self)
def Compile(self, unused_filter_implemention):
"""Given a filter implementation, compile this expression."""
raise NotImplementedError(
u'{0:s} does not implement Compile.'.format(self.__class__.__name__))
class BinaryExpression(Expression):
"""An expression which takes two other expressions."""
def __init__(self, operator='', part=None):
"""Initializes the expression object."""
self.operator = operator
self.args = []
if part:
self.args.append(part)
super(BinaryExpression, self).__init__()
def __str__(self):
"""Return a string representation of the binary expression."""
return 'Binary Expression: {0:s} {1:s}'.format(
self.operator, [str(x) for x in self.args])
def AddOperands(self, lhs, rhs):
"""Add an operant."""
if isinstance(lhs, Expression) and isinstance(rhs, Expression):
self.args = [lhs, rhs]
else:
raise ParseError(u'Expected expression, got {0:s} {1:s} {2:s}'.format(
lhs, self.operator, rhs))
# TODO: rename this function to GetTreeAsString or equivalent.
def PrintTree(self, depth=''):
"""Print the tree."""
result = u'{0:s}{1:s}\n'.format(depth, self.operator)
for part in self.args:
result += u'{0:s}-{1:s}\n'.format(depth, part.PrintTree(depth + ' '))
return result
def Compile(self, filter_implemention):
"""Compile the binary expression into a filter object."""
operator = self.operator.lower()
if operator == 'and' or operator == '&&':
method = 'AndFilter'
elif operator == 'or' or operator == '||':
method = 'OrFilter'
else:
raise ParseError(u'Invalid binary operator {0:s}'.format(operator))
args = [x.Compile(filter_implemention) for x in self.args]
return getattr(filter_implemention, method)(*args)
class IdentityExpression(Expression):
"""An Expression which always evaluates to True."""
def Compile(self, filter_implemention):
"""Compile the expression."""
return filter_implemention.IdentityFilter()
class SearchParser(Lexer):
"""This parser can parse the mini query language and build an AST.
Examples of valid syntax:
filename contains "foo" and (size > 100k or date before "2011-10")
date between 2011 and 2010
files older than 1 year
"""
expression_cls = Expression
binary_expression_cls = BinaryExpression
tokens = [
# Double quoted string
Token('STRING', '"', 'PopState,StringFinish', None),
Token('STRING', r'\\(.)', 'StringEscape', None),
Token('STRING', r'[^\\"]+', 'StringInsert', None),
# Single quoted string
Token('SQ_STRING', '\'', 'PopState,StringFinish', None),
Token('SQ_STRING', r'\\(.)', 'StringEscape', None),
Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None),
# TODO: Implement a unary not operator.
# The first thing we see in the initial state takes up to the ATTRIBUTE
Token('INITIAL', r'(and|or|\&\&|\|\|)', 'BinaryOperator', None),
Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'),
Token('INITIAL', r'\(', 'BracketOpen', None),
Token('INITIAL', r'\)', 'BracketClose', None),
Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'),
Token('OPERATOR', r'[a-z0-9<>=\-\+\!\^\&%]+', 'StoreOperator',
'ARG_LIST'),
Token('OPERATOR', r'(!=|[<>=])', 'StoreSpecialOperator', 'ARG_LIST'),
Token('ARG_LIST', r'[^\s\'"]+', 'InsertArg', None),
# Start a string.
Token('.', '"', 'PushState,StringStart', 'STRING'),
Token('.', '\'', 'PushState,StringStart', 'SQ_STRING'),
# Skip whitespace.
Token('.', r'\s+', None, None),
]
def __init__(self, data):
"""Initializes the search parser object."""
# Holds expression
self.current_expression = self.expression_cls()
self.filter_string = data
# The token stack
self.stack = []
Lexer.__init__(self, data)
def BinaryOperator(self, string=None, **_):
"""Set the binary operator."""
self.stack.append(self.binary_expression_cls(string))
def BracketOpen(self, **_):
"""Define an open bracket."""
self.stack.append('(')
def BracketClose(self, **_):
"""Close the bracket."""
self.stack.append(')')
def StringStart(self, **_):
"""Initialize the string."""
self.string = ''
def StringEscape(self, string, match, **_):
"""Escape backslashes found inside a string quote.
Backslashes followed by anything other than ['"rnbt] will just be included
in the string.
Args:
string: The string that matched.
match: The match object (m.group(1) is the escaped code)
"""
if match.group(1) in '\'"rnbt':
self.string += string.decode('string_escape')
else:
self.string += string
def StringInsert(self, string='', **_):
"""Add to the string."""
self.string += string
def StringFinish(self, **_):
"""Finish the string operation."""
if self.state == 'ATTRIBUTE':
return self.StoreAttribute(string=self.string)
elif self.state == 'ARG_LIST':
return self.InsertArg(string=self.string)
def StoreAttribute(self, string='', **_):
"""Store the attribute."""
logging.debug(u'Storing attribute {0:s}'.format(repr(string)))
# TODO: Update the expected number_of_args
try:
self.current_expression.SetAttribute(string)
except AttributeError:
raise ParseError(u'Invalid attribute \'{0:s}\''.format(string))
return 'OPERATOR'
def StoreOperator(self, string='', **_):
"""Store the operator."""
logging.debug(u'Storing operator {0:s}'.format(repr(string)))
self.current_expression.SetOperator(string)
def InsertArg(self, string='', **_):
"""Insert an arg to the current expression."""
logging.debug(u'Storing Argument {0:s}'.format(string))
# This expression is complete
if self.current_expression.AddArg(string):
self.stack.append(self.current_expression)
self.current_expression = self.expression_cls()
return self.PopState()
def _CombineBinaryExpressions(self, operator):
"""Combine binary expressions."""
for i in range(1, len(self.stack)-1):
item = self.stack[i]
if (isinstance(item, BinaryExpression) and item.operator == operator and
isinstance(self.stack[i-1], Expression) and
isinstance(self.stack[i+1], Expression)):
lhs = self.stack[i-1]
rhs = self.stack[i+1]
self.stack[i].AddOperands(lhs, rhs)
self.stack[i-1] = None
self.stack[i+1] = None
self.stack = filter(None, self.stack)
def _CombineParenthesis(self):
"""Combine parenthesis."""
for i in range(len(self.stack)-2):
if (self.stack[i] == '(' and self.stack[i+2] == ')' and
isinstance(self.stack[i+1], Expression)):
self.stack[i] = None
self.stack[i+2] = None
self.stack = filter(None, self.stack)
def Reduce(self):
"""Reduce the token stack into an AST."""
# Check for sanity
if self.state != 'INITIAL':
self.Error(u'Premature end of expression')
length = len(self.stack)
while length > 1:
# Precendence order
self._CombineParenthesis()
self._CombineBinaryExpressions('and')
self._CombineBinaryExpressions('or')
# No change
if len(self.stack) == length:
break
length = len(self.stack)
if length != 1:
self.Error(u'Illegal query expression')
return self.stack[0]
def Error(self, message=None, unused_weight=1):
"""Raise an error message."""
raise ParseError(u'{0:s} in position {1:s}: {2:s} <----> {3:s} )'.format(
message, len(self.processed_buffer), self.processed_buffer,
self.buffer))
def Parse(self):
"""Parse."""
if not self.filter_string:
return IdentityExpression()
self.Close()
return self.Reduce()
+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains few class variables that define various limits."""
MAX_INT64 = 2**64-1
+925
View File
@@ -0,0 +1,925 @@
#!/usr/bin/env python
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Originally copied from the GRR project:
# http://code.google.com/p/grr/source/browse/lib/objectfilter.py
# Copied on 11/15/2012
# Minor changes made to make it work in plaso.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to perform filtering of objects based on their data members.
Given a list of objects and a textual filter expression, these classes allow
you to determine which objects match the filter. The system has two main
pieces: A parser for the supported grammar and a filter implementation.
Given any complying user-supplied grammar, it is parsed with a custom lexer
based on GRR's lexer and then compiled into an actual implementation by using
the filter implementation. A filter implementation simply provides actual
implementations for the primitives required to perform filtering. The compiled
result is always a class supporting the Filter interface.
If we define a class called Car such as:
class Car(object):
def __init__(self, code, color="white", doors=3):
self.code = code
self.color = color
self.doors = 3
And we have two instances:
ford_ka = Car("FORDKA1", color="grey")
toyota_corolla = Car("COROLLA1", color="white", doors=5)
fleet = [ford_ka, toyota_corolla]
We want to find cars that are grey and have 3 or more doors. We could filter
our fleet like this:
criteria = "(color is grey) and (doors >= 3)"
parser = ContextFilterParser(criteria).Parse()
compiled_filter = parser.Compile(LowercaseAttributeFilterImp)
for car in fleet:
if compiled_filter.Matches(car):
print "Car %s matches the supplied filter." % car.code
The filter expression contains two subexpressions joined by an AND operator:
"color is grey" and "doors >= 3"
This means we want to search for objects matching these two subexpressions.
Let's analyze the first one in depth "color is grey":
"color": the left operand specifies a search path to look for the data. This
tells our filtering system to look for the color property on passed objects.
"is": the operator. Values retrieved for the "color" property will be checked
against the right operand to see if they are equal.
"grey": the right operand. It specifies an explicit value to check for.
So each time an object is passed through the filter, it will expand the value
of the color data member, and compare its value against "grey".
Because data members of objects are often not simple datatypes but other
objects, the system allows you to reference data members within other data
members by separating each by a dot. Let's see an example:
Let's add a more complex Car class with default tyre data:
class CarWithTyres(Car):
def __init__(self, code, tyres=None, color="white", doors=3):
super(self, CarWithTyres).__init__(code, color, doors)
tyres = tyres or Tyre("Pirelli", "PZERO")
class Tyre(object):
def __init__(self, brand, code):
self.brand = brand
self.code = code
And two new instances:
ford_ka = CarWithTyres("FORDKA", color="grey", tyres=Tyre("AVON", "ZT5"))
toyota_corolla = Car("COROLLA1", color="white", doors=5)
fleet = [ford_ka, toyota_corolla]
To filter a car based on the tyre brand, we would use a search path of
"tyres.brand".
Because the filter implementation provides the actual classes that perform
handling of the search paths, operators, etc. customizing the behaviour of the
filter is easy. Three basic filter implementations are given:
BaseFilterImplementation: search path expansion is done on attribute names
as provided (case-sensitive).
LowercaseAttributeFilterImp: search path expansion is done on the lowercased
attribute name, so that it only accesses attributes, not methods.
DictFilterImplementation: search path expansion is done on dictionary access
to the given object. So "a.b" expands the object obj to obj["a"]["b"]
"""
import abc
import binascii
import logging
import re
from plaso.lib import lexer
from plaso.lib import utils
class Error(Exception):
"""Base module exception."""
class MalformedQueryError(Error):
"""The provided filter query is malformed."""
class ParseError(Error):
"""The parser for textual queries returned invalid results."""
class InvalidNumberOfOperands(Error):
"""The number of operands provided to this operator is wrong."""
class Filter(object):
"""Base class for every filter."""
def __init__(self, arguments=None, value_expander=None):
"""Constructor.
Args:
arguments: Arguments to the filter.
value_expander: A callable that will be used to expand values for the
objects passed to this filter. Implementations expanders are provided by
subclassing ValueExpander.
Raises:
Error: If the given value_expander is not a subclass of ValueExpander
"""
self.value_expander = None
self.value_expander_cls = value_expander
if self.value_expander_cls:
if not issubclass(self.value_expander_cls, ValueExpander):
raise Error(u'{0:s} is not a valid value expander'.format(
self.value_expander_cls))
self.value_expander = self.value_expander_cls()
self.args = arguments or []
logging.debug(u'Adding {0:s}'.format(arguments))
@abc.abstractmethod
def Matches(self, obj):
"""Whether object obj matches this filter."""
def Filter(self, objects):
"""Returns a list of objects that pass the filter."""
return filter(self.Matches, objects)
def __str__(self):
return '{0:s}({1:s})'.format(
self.__class__.__name__, ', '.join([str(arg) for arg in self.args]))
class AndFilter(Filter):
"""Performs a boolean AND of the given Filter instances as arguments.
Note that if no conditions are passed, all objects will pass.
"""
def Matches(self, obj):
for child_filter in self.args:
if not child_filter.Matches(obj):
return False
return True
class OrFilter(Filter):
"""Performs a boolean OR of the given Filter instances as arguments.
Note that if no conditions are passed, all objects will pass.
"""
def Matches(self, obj):
if not self.args:
return True
for child_filter in self.args:
if child_filter.Matches(obj):
return True
return False
# pylint: disable=abstract-method
class Operator(Filter):
"""Base class for all operators."""
class IdentityFilter(Operator):
def Matches(self, _):
return True
class UnaryOperator(Operator):
"""Base class for unary operators."""
def __init__(self, operand, **kwargs):
"""Constructor."""
super(UnaryOperator, self).__init__(arguments=[operand], **kwargs)
if len(self.args) != 1:
raise InvalidNumberOfOperands(
u'Only one operand is accepted by {0:s}. Received {1:d}.'.format(
self.__class__.__name__, len(self.args)))
class BinaryOperator(Operator):
"""Base class for binary operators.
The left operand is always a path into the object which will be expanded for
values. The right operand is a value defined at initialization and is stored
at self.right_operand.
"""
def __init__(self, arguments=None, **kwargs):
super(BinaryOperator, self).__init__(arguments=arguments, **kwargs)
if len(self.args) != 2:
raise InvalidNumberOfOperands(
u'Only two operands are accepted by {0:s}. Received {1:s}.'.format(
self.__class__.__name__, len(self.args)))
self.left_operand = self.args[0]
self.right_operand = self.args[1]
class GenericBinaryOperator(BinaryOperator):
"""Allows easy implementations of operators."""
def __init__(self, **kwargs):
super(GenericBinaryOperator, self).__init__(**kwargs)
self.bool_value = True
def FlipBool(self):
logging.debug(u'Negative matching.')
self.bool_value = not self.bool_value
def Operation(self, x, y):
"""Performs the operation between two values."""
def Operate(self, values):
"""Takes a list of values and if at least one matches, returns True."""
for val in values:
try:
if self.Operation(val, self.right_operand):
return True
else:
continue
except (ValueError, TypeError):
continue
return False
def Matches(self, obj):
key = self.left_operand
values = self.value_expander.Expand(obj, key)
if values and self.Operate(values):
return self.bool_value
return not self.bool_value
class Equals(GenericBinaryOperator):
"""Matches objects when the right operand equals the expanded value."""
def Operation(self, x, y):
return x == y
class NotEquals(Equals):
"""Matches when the right operand isn't equal to the expanded value."""
def __init__(self, **kwargs):
super(NotEquals, self).__init__(**kwargs)
self.bool_value = False
class Less(GenericBinaryOperator):
"""Whether the expanded value >= right_operand."""
def Operation(self, x, y):
return x < y
class LessEqual(GenericBinaryOperator):
"""Whether the expanded value <= right_operand."""
def Operation(self, x, y):
return x <= y
class Greater(GenericBinaryOperator):
"""Whether the expanded value > right_operand."""
def Operation(self, x, y):
return x > y
class GreaterEqual(GenericBinaryOperator):
"""Whether the expanded value >= right_operand."""
def Operation(self, x, y):
return x >= y
class Contains(GenericBinaryOperator):
"""Whether the right operand is contained in the value."""
def Operation(self, x, y):
if type(x) in (str, unicode):
return y.lower() in x.lower()
return y in x
class InSet(GenericBinaryOperator):
# TODO(user): Change to an N-ary Operator?
"""Whether all values are contained within the right operand."""
def Operation(self, x, y):
"""Whether x is fully contained in y."""
if x in y:
return True
# x might be an iterable
# first we need to skip strings or we'll do silly things
if (isinstance(x, basestring)
or isinstance(x, bytes)):
return False
try:
for value in x:
if value not in y:
return False
return True
except TypeError:
return False
class Regexp(GenericBinaryOperator):
"""Whether the value matches the regexp in the right operand."""
def __init__(self, *children, **kwargs):
super(Regexp, self).__init__(*children, **kwargs)
# Note that right_operand is not necessarily a string.
logging.debug(u'Compiled: {0!s}'.format(self.right_operand))
try:
self.compiled_re = re.compile(
utils.GetUnicodeString(self.right_operand), re.DOTALL)
except re.error:
raise ValueError(u'Regular expression "{0!s}" is malformed.'.format(
self.right_operand))
def Operation(self, x, unused_y):
try:
if self.compiled_re.search(utils.GetUnicodeString(x)):
return True
except TypeError:
pass
return False
class RegexpInsensitive(Regexp):
"""Whether the value matches the regexp in the right operand."""
def __init__(self, *children, **kwargs):
super(RegexpInsensitive, self).__init__(*children, **kwargs)
# Note that right_operand is not necessarily a string.
logging.debug(u'Compiled: {0!s}'.format(self.right_operand))
try:
self.compiled_re = re.compile(utils.GetUnicodeString(self.right_operand),
re.I | re.DOTALL)
except re.error:
raise ValueError(u'Regular expression "{0!s}" is malformed.'.format(
self.right_operand))
class Context(Operator):
"""Restricts the child operators to a specific context within the object.
Solves the context problem. The context problem is the following:
Suppose you store a list of loaded DLLs within a process. Suppose that for
each of these DLLs you store the number of imported functions and each of the
imported functions name.
Imagine that a malicious DLL is injected into processes and its indicators are
that it only imports one function and that it is RegQueryValueEx. You'd write
your indicator like this:
AndOperator(
Equal("ImportedDLLs.ImpFunctions.Name", "RegQueryValueEx"),
Equal("ImportedDLLs.NumImpFunctions", "1")
)
Now imagine you have these two processes on a given system.
Process1
+[0]__ImportedDlls
+[0]__Name: "notevil.dll"
|[0]__ImpFunctions
| +[1]__Name: "CreateFileA"
|[0]__NumImpFunctions: 1
|
+[1]__Name: "alsonotevil.dll"
|[1]__ImpFunctions
| +[0]__Name: "RegQueryValueEx"
| +[1]__Name: "CreateFileA"
|[1]__NumImpFunctions: 2
Process2
+[0]__ImportedDlls
+[0]__Name: "evil.dll"
|[0]__ImpFunctions
| +[0]__Name: "RegQueryValueEx"
|[0]__NumImpFunctions: 1
Both Process1 and Process2 match your query, as each of the indicators are
evaluated separatedly. While you wanted to express "find me processes that
have a DLL that has both one imported function and ReqQueryValueEx is in the
list of imported functions", your indicator actually means "find processes
that have at least a DLL with 1 imported functions and at least one DLL that
imports the ReqQueryValueEx function".
To write such an indicator you need to specify a context of ImportedDLLs for
these two clauses. Such that you convert your indicator to:
Context("ImportedDLLs",
AndOperator(
Equal("ImpFunctions.Name", "RegQueryValueEx"),
Equal("NumImpFunctions", "1")
))
Context will execute the filter specified as the second parameter for each of
the objects under "ImportedDLLs", thus applying the condition per DLL, not per
object and returning the right result.
"""
def __init__(self, arguments=None, **kwargs):
if len(arguments) != 2:
raise InvalidNumberOfOperands(u'Context accepts only 2 operands.')
super(Context, self).__init__(arguments=arguments, **kwargs)
self.context, self.condition = self.args
def Matches(self, obj):
for object_list in self.value_expander.Expand(obj, self.context):
for sub_object in object_list:
if self.condition.Matches(sub_object):
return True
return False
OP2FN = {
'equals': Equals,
'is': Equals,
'==': Equals,
'!=': NotEquals,
'contains': Contains,
'>': Greater,
'>=': GreaterEqual,
'<': Less,
'<=': LessEqual,
'inset': InSet,
'regexp': Regexp,
'iregexp': RegexpInsensitive}
class ValueExpander(object):
"""Encapsulates the logic to expand values available in an object.
Once instantiated and called, this class returns all the values that follow a
given field path.
"""
FIELD_SEPARATOR = '.'
def _GetAttributeName(self, path):
"""Returns the attribute name to fetch given a path."""
return path[0]
def _GetValue(self, unused_obj, unused_attr_name):
"""Returns the value of tha attribute attr_name."""
raise NotImplementedError()
def _AtLeaf(self, attr_value):
"""Called when at a leaf value. Should yield a value."""
yield attr_value
def _AtNonLeaf(self, attr_value, path):
"""Called when at a non-leaf value. Should recurse and yield values."""
try:
# Check first for iterables
# If it's a dictionary, we yield it
if isinstance(attr_value, dict):
yield attr_value
else:
# If it's an iterable, we recurse on each value.
for sub_obj in attr_value:
for value in self.Expand(sub_obj, path[1:]):
yield value
except TypeError: # This is then not iterable, we recurse with the value
for value in self.Expand(attr_value, path[1:]):
yield value
def Expand(self, obj, path):
"""Returns a list of all the values for the given path in the object obj.
Given a path such as ["sub1", "sub2"] it returns all the values available
in obj.sub1.sub2 as a list. sub1 and sub2 must be data attributes or
properties.
If sub1 returns a list of objects, or a generator, Expand aggregates the
values for the remaining path for each of the objects, thus returning a
list of all the values under the given path for the input object.
Args:
obj: An object that will be traversed for the given path
path: A list of strings
Yields:
The values once the object is traversed.
"""
if isinstance(path, basestring):
path = path.split(self.FIELD_SEPARATOR)
attr_name = self._GetAttributeName(path)
attr_value = self._GetValue(obj, attr_name)
if attr_value is None:
return
if len(path) == 1:
for value in self._AtLeaf(attr_value):
yield value
else:
for value in self._AtNonLeaf(attr_value, path):
yield value
class AttributeValueExpander(ValueExpander):
"""An expander that gives values based on object attribute names."""
def _GetValue(self, obj, attr_name):
return getattr(obj, attr_name, None)
class LowercaseAttributeValueExpander(AttributeValueExpander):
"""An expander that lowercases all attribute names before access."""
def _GetAttributeName(self, path):
return path[0].lower()
class DictValueExpander(ValueExpander):
"""An expander that gets values from dictionary access to the object."""
def _GetValue(self, obj, attr_name):
return obj.get(attr_name, None)
class BasicExpression(lexer.Expression):
"""Basic Expression."""
def __init__(self):
super(BasicExpression, self).__init__()
self.bool_value = True
def FlipBool(self):
self.bool_value = not self.bool_value
def Compile(self, filter_implementation):
arguments = [self.attribute]
op_str = self.operator.lower()
operator = filter_implementation.OPS.get(op_str, None)
if not operator:
raise ParseError(u'Unknown operator {0:s} provided.'.format(
self.operator))
arguments.extend(self.args)
expander = filter_implementation.FILTERS['ValueExpander']
ops = operator(arguments=arguments, value_expander=expander)
if not self.bool_value:
if hasattr(ops, 'FlipBool'):
ops.FlipBool()
return ops
class ContextExpression(lexer.Expression):
"""Represents the context operator."""
def __init__(self, attribute="", part=None):
self.attribute = attribute
self.args = []
if part:
self.args.append(part)
super(ContextExpression, self).__init__()
def __str__(self):
return 'Context({0:s} {1:s})'.format(
self.attribute, [str(x) for x in self.args])
def SetExpression(self, expression):
"""Set the expression."""
if isinstance(expression, lexer.Expression):
self.args = [expression]
else:
raise ParseError(u'Expected expression, got {0:s}.'.format(expression))
def Compile(self, filter_implementation):
"""Compile the expression."""
arguments = [self.attribute]
for arg in self.args:
arguments.append(arg.Compile(filter_implementation))
expander = filter_implementation.FILTERS['ValueExpander']
context_cls = filter_implementation.FILTERS['Context']
return context_cls(arguments=arguments,
value_expander=expander)
class BinaryExpression(lexer.BinaryExpression):
def Compile(self, filter_implementation):
"""Compile the binary expression into a filter object."""
operator = self.operator.lower()
if operator == 'and' or operator == '&&':
method = 'AndFilter'
elif operator == 'or' or operator == '||':
method = 'OrFilter'
else:
raise ParseError(u'Invalid binary operator {0:s}.'.format(operator))
args = [x.Compile(filter_implementation) for x in self.args]
return filter_implementation.FILTERS[method](arguments=args)
class Parser(lexer.SearchParser):
"""Parses and generates an AST for a query written in the described language.
Examples of valid syntax:
size is 40
(name contains "Program Files" AND hash.md5 is "123abc")
@imported_modules (num_symbols = 14 AND symbol.name is "FindWindow")
"""
expression_cls = BasicExpression
binary_expression_cls = BinaryExpression
context_cls = ContextExpression
tokens = [
# Operators and related tokens
lexer.Token('INITIAL', r'\@[\w._0-9]+',
'ContextOperator,PushState', 'CONTEXTOPEN'),
lexer.Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'),
lexer.Token('INITIAL', r'\(', 'PushState,BracketOpen', None),
lexer.Token('INITIAL', r'\)', 'BracketClose', 'BINARY'),
# Context
lexer.Token('CONTEXTOPEN', r'\(', 'BracketOpen', 'INITIAL'),
# Double quoted string
lexer.Token('STRING', '"', 'PopState,StringFinish', None),
lexer.Token('STRING', r'\\x(..)', 'HexEscape', None),
lexer.Token('STRING', r'\\(.)', 'StringEscape', None),
lexer.Token('STRING', r'[^\\"]+', 'StringInsert', None),
# Single quoted string
lexer.Token('SQ_STRING', '\'', 'PopState,StringFinish', None),
lexer.Token('SQ_STRING', r'\\x(..)', 'HexEscape', None),
lexer.Token('SQ_STRING', r'\\(.)', 'StringEscape', None),
lexer.Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None),
# Basic expression
lexer.Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'),
lexer.Token('OPERATOR', r'not ', 'FlipLogic', None),
lexer.Token('OPERATOR', r'(\w+|[<>!=]=?)', 'StoreOperator', 'CHECKNOT'),
lexer.Token('CHECKNOT', r'not', 'FlipLogic', 'ARG'),
lexer.Token('CHECKNOT', r'\s+', None, None),
lexer.Token('CHECKNOT', r'([^not])', 'PushBack', 'ARG'),
lexer.Token('ARG', r'(\d+\.\d+)', 'InsertFloatArg', 'ARG'),
lexer.Token('ARG', r'(0x\d+)', 'InsertInt16Arg', 'ARG'),
lexer.Token('ARG', r'(\d+)', 'InsertIntArg', 'ARG'),
lexer.Token('ARG', '"', 'PushState,StringStart', 'STRING'),
lexer.Token('ARG', '\'', 'PushState,StringStart', 'SQ_STRING'),
# When the last parameter from arg_list has been pushed
# State where binary operators are supported (AND, OR)
lexer.Token('BINARY', r'(?i)(and|or|\&\&|\|\|)',
'BinaryOperator', 'INITIAL'),
# - We can also skip spaces
lexer.Token('BINARY', r'\s+', None, None),
# - But if it's not "and" or just spaces we have to go back
lexer.Token('BINARY', '.', 'PushBack,PopState', None),
# Skip whitespace.
lexer.Token('.', r'\s+', None, None),
]
def StoreAttribute(self, string='', **kwargs):
self.flipped = False
super(Parser, self).StoreAttribute(string, **kwargs)
def FlipAllowed(self):
"""Raise an error if the not keyword is used where it is not allowed."""
if not hasattr(self, 'flipped'):
raise ParseError(u'Not defined.')
if not self.flipped:
return
if self.current_expression.operator:
if not self.current_expression.operator.lower() in (
'is', 'contains', 'inset', 'equals'):
raise ParseError(
u'Keyword \'not\' does not work against operator: {0:s}'.format(
self.current_expression.operator))
def FlipLogic(self, **unused_kwargs):
"""Flip the boolean logic of the expression.
If an expression is configured to return True when the condition
is met this logic will flip that to False, and vice versa.
"""
if hasattr(self, 'flipped') and self.flipped:
raise ParseError(u'The operator \'not\' can only be expressed once.')
if self.current_expression.args:
raise ParseError(
u'Unable to place the keyword \'not\' after an argument.')
self.flipped = True
# Check if this flip operation should be allowed.
self.FlipAllowed()
if hasattr(self.current_expression, 'FlipBool'):
self.current_expression.FlipBool()
logging.debug(u'Negative matching [flipping boolean logic].')
else:
logging.warning(
u'Unable to perform a negative match, issuing a positive one.')
def InsertArg(self, string='', **unused_kwargs):
"""Insert an arg to the current expression."""
# Note that "string" is not necessarily of type string.
logging.debug(u'Storing argument: {0!s}'.format(string))
# Check if this flip operation should be allowed.
self.FlipAllowed()
# This expression is complete
if self.current_expression.AddArg(string):
self.stack.append(self.current_expression)
self.current_expression = self.expression_cls()
# We go to the BINARY state, to find if there's an AND or OR operator
return 'BINARY'
def InsertFloatArg(self, string='', **unused_kwargs):
"""Inserts a Float argument."""
try:
float_value = float(string)
except (TypeError, ValueError):
raise ParseError(u'{0:s} is not a valid float.'.format(string))
return self.InsertArg(float_value)
def InsertIntArg(self, string='', **unused_kwargs):
"""Inserts an Integer argument."""
try:
int_value = int(string)
except (TypeError, ValueError):
raise ParseError(u'{0:s} is not a valid integer.'.format(string))
return self.InsertArg(int_value)
def InsertInt16Arg(self, string='', **unused_kwargs):
"""Inserts an Integer in base16 argument."""
try:
int_value = int(string, 16)
except (TypeError, ValueError):
raise ParseError(u'{0:s} is not a valid base16 integer.'.format(string))
return self.InsertArg(int_value)
def StringFinish(self, **unused_kwargs):
if self.state == 'ATTRIBUTE':
return self.StoreAttribute(string=self.string)
elif self.state == 'ARG':
return self.InsertArg(string=self.string)
def StringEscape(self, string, match, **unused_kwargs):
"""Escape backslashes found inside a string quote.
Backslashes followed by anything other than [\'"rnbt.ws] will raise
an Error.
Args:
string: The string that matched.
match: The match object (m.group(1) is the escaped code)
Raises:
ParseError: When the escaped string is not one of [\'"rnbt]
"""
if match.group(1) in '\\\'"rnbt\\.ws':
self.string += string.decode('string_escape')
else:
raise ParseError(u'Invalid escape character {0:s}.'.format(string))
def HexEscape(self, string, match, **unused_kwargs):
"""Converts a hex escaped string."""
logging.debug(u'HexEscape matched {0:s}.'.format(string))
hex_string = match.group(1)
try:
self.string += binascii.unhexlify(hex_string)
except TypeError:
raise ParseError(u'Invalid hex escape {0:s}.'.format(string))
def ContextOperator(self, string='', **unused_kwargs):
self.stack.append(self.context_cls(string[1:]))
def Reduce(self):
"""Reduce the token stack into an AST."""
# Check for sanity
if self.state != 'INITIAL' and self.state != 'BINARY':
self.Error(u'Premature end of expression')
length = len(self.stack)
while length > 1:
# Precendence order
self._CombineParenthesis()
self._CombineBinaryExpressions('and')
self._CombineBinaryExpressions('or')
self._CombineContext()
# No change
if len(self.stack) == length:
break
length = len(self.stack)
if length != 1:
self.Error(u'Illegal query expression.')
return self.stack[0]
def Error(self, message=None, _=None):
# Note that none of the values necessarily are strings.
raise ParseError(u'{0!s} in position {1!s}: {2!s} <----> {3!s} )'.format(
message, len(self.processed_buffer), self.processed_buffer,
self.buffer))
def _CombineBinaryExpressions(self, operator):
for i in range(1, len(self.stack)-1):
item = self.stack[i]
if (isinstance(item, lexer.BinaryExpression) and
item.operator.lower() == operator.lower() and
isinstance(self.stack[i-1], lexer.Expression) and
isinstance(self.stack[i+1], lexer.Expression)):
lhs = self.stack[i-1]
rhs = self.stack[i+1]
self.stack[i].AddOperands(lhs, rhs)
self.stack[i-1] = None
self.stack[i+1] = None
self.stack = filter(None, self.stack)
def _CombineContext(self):
# Context can merge from item 0
for i in range(len(self.stack)-1, 0, -1):
item = self.stack[i-1]
if (isinstance(item, ContextExpression) and
isinstance(self.stack[i], lexer.Expression)):
expression = self.stack[i]
self.stack[i-1].SetExpression(expression)
self.stack[i] = None
self.stack = filter(None, self.stack)
### FILTER IMPLEMENTATIONS
class BaseFilterImplementation(object):
"""Defines the base implementation of an object filter by its attributes.
Inherit from this class, switch any of the needed operators and pass it to
the Compile method of a parsed string to obtain an executable filter.
"""
OPS = OP2FN
FILTERS = {
'ValueExpander': AttributeValueExpander,
'AndFilter': AndFilter,
'OrFilter': OrFilter,
'IdentityFilter': IdentityFilter,
'Context': Context}
class LowercaseAttributeFilterImplementation(BaseFilterImplementation):
"""Does field name access on the lowercase version of names.
Useful to only access attributes and properties with Google's python naming
style.
"""
FILTERS = {}
FILTERS.update(BaseFilterImplementation.FILTERS)
FILTERS.update({'ValueExpander': LowercaseAttributeValueExpander})
class DictFilterImplementation(BaseFilterImplementation):
"""Does value fetching by dictionary access on the object."""
FILTERS = {}
FILTERS.update(BaseFilterImplementation.FILTERS)
FILTERS.update({'ValueExpander': DictValueExpander})
+519
View File
@@ -0,0 +1,519 @@
#!/usr/bin/env python
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the object filter."""
import unittest
from plaso.lib import objectfilter
class DummyObject(object):
def __init__(self, key, value):
setattr(self, key, value)
class HashObject(object):
def __init__(self, hash_value=None):
self.value = hash_value
@property
def md5(self):
return self.value
def __eq__(self, y):
return self.value == y
def __lt__(self, y):
return self.value < y
class Dll(object):
def __init__(self, name, imported_functions=None, exported_functions=None):
self.name = name
self._imported_functions = imported_functions or []
self.num_imported_functions = len(self._imported_functions)
self.exported_functions = exported_functions or []
self.num_exported_functions = len(self.exported_functions)
@property
def imported_functions(self):
for fn in self._imported_functions:
yield fn
class DummyFile(object):
_FILENAME = 'boot.ini'
ATTR1 = 'Backup'
ATTR2 = 'Archive'
HASH1 = '123abc'
HASH2 = '456def'
non_callable_leaf = 'yoda'
def __init__(self):
self.non_callable = HashObject(self.HASH1)
self.non_callable_repeated = [
DummyObject('desmond', ['brotha', 'brotha']),
DummyObject('desmond', ['brotha', 'sista'])]
self.imported_dll1 = Dll('a.dll', ['FindWindow', 'CreateFileA'])
self.imported_dll2 = Dll('b.dll', ['RegQueryValueEx'])
@property
def name(self):
return self._FILENAME
@property
def attributes(self):
return [self.ATTR1, self.ATTR2]
@property
def hash(self):
return [HashObject(self.HASH1), HashObject(self.HASH2)]
@property
def size(self):
return 10
@property
def deferred_values(self):
for v in ['a', 'b']:
yield v
@property
def novalues(self):
return []
@property
def imported_dlls(self):
return [self.imported_dll1, self.imported_dll2]
def Callable(self):
raise RuntimeError(u'This can not be called.')
@property
def float(self):
return 123.9823
class ObjectFilterTest(unittest.TestCase):
def setUp(self):
self.file = DummyFile()
self.filter_imp = objectfilter.LowercaseAttributeFilterImplementation
self.value_expander = self.filter_imp.FILTERS['ValueExpander']
operator_tests = {
objectfilter.Less: [
(True, ['size', 1000]),
(True, ['size', 11]),
(False, ['size', 10]),
(False, ['size', 0]),
(False, ['float', 1.0]),
(True, ['float', 123.9824])],
objectfilter.LessEqual: [
(True, ['size', 1000]),
(True, ['size', 11]),
(True, ['size', 10]),
(False, ['size', 9]),
(False, ['float', 1.0]),
(True, ['float', 123.9823])],
objectfilter.Greater: [
(True, ['size', 1]),
(True, ['size', 9.23]),
(False, ['size', 10]),
(False, ['size', 1000]),
(True, ['float', 122]),
(True, ['float', 1.0])],
objectfilter.GreaterEqual: [
(False, ['size', 1000]),
(False, ['size', 11]),
(True, ['size', 10]),
(True, ['size', 0]),
# Floats work fine too.
(True, ['float', 122]),
(True, ['float', 123.9823]),
# Comparisons works with strings, although it might be a bit silly.
(True, ['name', 'aoot.ini'])],
objectfilter.Contains: [
# Contains works with strings.
(True, ['name', 'boot.ini']),
(True, ['name', 'boot']),
(False, ['name', 'meh']),
# Works with generators.
(True, ['imported_dlls.imported_functions', 'FindWindow']),
# But not with numbers.
(False, ['size', 12])],
objectfilter.Equals: [
(True, ['name', 'boot.ini']),
(False, ['name', 'foobar']),
(True, ['float', 123.9823])],
objectfilter.NotEquals: [
(False, ['name', 'boot.ini']),
(True, ['name', 'foobar']),
(True, ['float', 25])],
objectfilter.InSet: [
(True, ['name', ['boot.ini', 'autoexec.bat']]),
(True, ['name', 'boot.ini']),
(False, ['name', 'NOPE']),
# All values of attributes are within these.
(True, ['attributes', ['Archive', 'Backup', 'Nonexisting']]),
# Not all values of attributes are within these.
(False, ['attributes', ['Executable', 'Sparse']])],
objectfilter.Regexp: [
(True, ['name', '^boot.ini$']),
(True, ['name', 'boot.ini']),
(False, ['name', '^$']),
(True, ['attributes', 'Archive']),
# One can regexp numbers if he's inclined to.
(True, ['size', 0]),
# But regexp doesn't work with lists or generators for the moment.
(False, ['imported_dlls.imported_functions', 'FindWindow'])],
}
def testBinaryOperators(self):
for operator, test_data in self.operator_tests.items():
for test_unit in test_data:
# TODO: why is there a print statement here?
print (u'Testing {0:s} with {1!s} and {2!s}'.format(
operator, test_unit[0], test_unit[1]))
kwargs = {'arguments': test_unit[1],
'value_expander': self.value_expander}
ops = operator(**kwargs)
self.assertEqual(test_unit[0], ops.Matches(self.file))
if hasattr(ops, 'FlipBool'):
ops.FlipBool()
# TODO: why is there a print statement here?
print u'Testing negative matching.'
self.assertEqual(not test_unit[0], ops.Matches(self.file))
def testExpand(self):
# Case insensitivity.
values_lowercase = self.value_expander().Expand(self.file, 'size')
values_uppercase = self.value_expander().Expand(self.file, 'Size')
self.assertListEqual(list(values_lowercase), list(values_uppercase))
# Existing, non-repeated, leaf is a value.
values = self.value_expander().Expand(self.file, 'size')
self.assertListEqual(list(values), [10])
# Existing, non-repeated, leaf is iterable.
values = self.value_expander().Expand(self.file, 'attributes')
self.assertListEqual(list(values), [[DummyFile.ATTR1, DummyFile.ATTR2]])
# Existing, repeated, leaf is value.
values = self.value_expander().Expand(self.file, 'hash.md5')
self.assertListEqual(list(values), [DummyFile.HASH1, DummyFile.HASH2])
# Existing, repeated, leaf is iterable.
values = self.value_expander().Expand(
self.file, 'non_callable_repeated.desmond')
self.assertListEqual(
list(values), [['brotha', 'brotha'], ['brotha', 'sista']])
# Now with an iterator.
values = self.value_expander().Expand(self.file, 'deferred_values')
self.assertListEqual([list(value) for value in values], [['a', 'b']])
# Iterator > generator.
values = self.value_expander().Expand(
self.file, 'imported_dlls.imported_functions')
expected = [['FindWindow', 'CreateFileA'], ['RegQueryValueEx']]
self.assertListEqual([list(value) for value in values], expected)
# Non-existing first path.
values = self.value_expander().Expand(self.file, 'nonexistant')
self.assertListEqual(list(values), [])
# Non-existing in the middle.
values = self.value_expander().Expand(self.file, 'hash.mink.boo')
self.assertListEqual(list(values), [])
# Non-existing as a leaf.
values = self.value_expander().Expand(self.file, 'hash.mink')
self.assertListEqual(list(values), [])
# Non-callable leaf.
values = self.value_expander().Expand(self.file, 'non_callable_leaf')
self.assertListEqual(list(values), [DummyFile.non_callable_leaf])
# callable.
values = self.value_expander().Expand(self.file, 'Callable')
self.assertListEqual(list(values), [])
# leaf under a callable. Will return nothing.
values = self.value_expander().Expand(self.file, 'Callable.a')
self.assertListEqual(list(values), [])
def testGenericBinaryOperator(self):
class TestBinaryOperator(objectfilter.GenericBinaryOperator):
values = list()
def Operation(self, x, _):
return self.values.append(x)
# Test a common binary operator.
tbo = TestBinaryOperator(
arguments=['whatever', 0], value_expander=self.value_expander)
self.assertEqual(tbo.right_operand, 0)
self.assertEqual(tbo.args[0], 'whatever')
tbo.Matches(DummyObject('whatever', 'id'))
tbo.Matches(DummyObject('whatever', 'id2'))
tbo.Matches(DummyObject('whatever', 'bg'))
tbo.Matches(DummyObject('whatever', 'bg2'))
self.assertListEqual(tbo.values, ['id', 'id2', 'bg', 'bg2'])
def testContext(self):
self.assertRaises(
objectfilter.InvalidNumberOfOperands, objectfilter.Context,
arguments=['context'], value_expander=self.value_expander)
self.assertRaises(
objectfilter.InvalidNumberOfOperands, objectfilter.Context,
arguments=[
'context', objectfilter.Equals(
arguments=['path', 'value'],
value_expander=self.value_expander),
objectfilter.Equals(
arguments=['another_path', 'value'],
value_expander=self.value_expander)],
value_expander=self.value_expander)
# One imported_dll imports 2 functions AND one imported_dll imports
# function RegQueryValueEx.
arguments = [
objectfilter.Equals(
arguments=['imported_dlls.num_imported_functions', 1],
value_expander=self.value_expander),
objectfilter.Contains(
arguments=['imported_dlls.imported_functions',
'RegQueryValueEx'],
value_expander=self.value_expander)]
condition = objectfilter.AndFilter(arguments=arguments)
# Without context, it matches because both filters match separately.
self.assertEqual(True, condition.Matches(self.file))
arguments = [
objectfilter.Equals(
arguments=['num_imported_functions', 2],
value_expander=self.value_expander),
objectfilter.Contains(
arguments=['imported_functions', 'RegQueryValueEx'],
value_expander=self.value_expander)]
condition = objectfilter.AndFilter(arguments=arguments)
# The same DLL imports 2 functions AND one of these is RegQueryValueEx.
context = objectfilter.Context(arguments=['imported_dlls', condition],
value_expander=self.value_expander)
# With context, it doesn't match because both don't match in the same dll.
self.assertEqual(False, context.Matches(self.file))
# One imported_dll imports only 1 function AND one imported_dll imports
# function RegQueryValueEx.
condition = objectfilter.AndFilter(arguments=[
objectfilter.Equals(
arguments=['num_imported_functions', 1],
value_expander=self.value_expander),
objectfilter.Contains(
arguments=['imported_functions', 'RegQueryValueEx'],
value_expander=self.value_expander)])
# The same DLL imports 1 function AND it's RegQueryValueEx.
context = objectfilter.Context(['imported_dlls', condition],
value_expander=self.value_expander)
self.assertEqual(True, context.Matches(self.file))
# Now test the context with a straight query.
query = u'\n'.join([
'@imported_dlls',
'(',
' imported_functions contains "RegQueryValueEx"',
' AND num_imported_functions == 1',
')'])
filter_ = objectfilter.Parser(query).Parse()
filter_ = filter_.Compile(self.filter_imp)
self.assertEqual(True, filter_.Matches(self.file))
def testRegexpRaises(self):
with self.assertRaises(ValueError):
objectfilter.Regexp(
arguments=['name', 'I [dont compile'],
value_expander=self.value_expander)
def testEscaping(self):
parser = objectfilter.Parser(r'a is "\n"').Parse()
self.assertEqual(parser.args[0], '\n')
# Invalid escape sequence.
parser = objectfilter.Parser(r'a is "\z"')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Can escape the backslash.
parser = objectfilter.Parser(r'a is "\\"').Parse()
self.assertEqual(parser.args[0], '\\')
# Test hexadecimal escaping.
# This fails as it's not really a hex escaped string.
parser = objectfilter.Parser(r'a is "\xJZ"')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Instead, this is what one should write.
parser = objectfilter.Parser(r'a is "\\xJZ"').Parse()
self.assertEqual(parser.args[0], r'\xJZ')
# Standard hex-escape.
parser = objectfilter.Parser(r'a is "\x41\x41\x41"').Parse()
self.assertEqual(parser.args[0], 'AAA')
# Hex-escape + a character.
parser = objectfilter.Parser(r'a is "\x414"').Parse()
self.assertEqual(parser.args[0], r'A4')
# How to include r'\x41'.
parser = objectfilter.Parser(r'a is "\\x41"').Parse()
self.assertEqual(parser.args[0], r'\x41')
def testParse(self):
# Arguments are either int, float or quoted string.
objectfilter.Parser('attribute == 1').Parse()
objectfilter.Parser('attribute == 0x10').Parse()
parser = objectfilter.Parser('attribute == 1a')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
objectfilter.Parser('attribute == 1.2').Parse()
objectfilter.Parser('attribute == \'bla\'').Parse()
objectfilter.Parser('attribute == "bla"').Parse()
parser = objectfilter.Parser('something == red')
self.assertRaises(objectfilter.ParseError, parser.Parse)
# Can't start with AND.
parser = objectfilter.Parser('and something is \'Blue\'')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Test negative filters.
parser = objectfilter.Parser('attribute not == \'dancer\'')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
parser = objectfilter.Parser('attribute == not \'dancer\'')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
parser = objectfilter.Parser('attribute not not equals \'dancer\'')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
parser = objectfilter.Parser('attribute not > 23')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Need to close braces.
objectfilter.Parser('(a is 3)').Parse()
parser = objectfilter.Parser('(a is 3')
self.assertRaises(objectfilter.ParseError, parser.Parse)
# Need to open braces to close them.
parser = objectfilter.Parser('a is 3)')
self.assertRaises(objectfilter.ParseError, parser.Parse)
# Context Operator alone is not accepted.
parser = objectfilter.Parser('@attributes')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Accepted only with braces.
objectfilter.Parser('@attributes( name is \'adrien\')').Parse()
# Not without them.
parser = objectfilter.Parser('@attributes name is \'adrien\'')
with self.assertRaises(objectfilter.ParseError):
parser.Parse()
# Can nest context operators.
query = '@imported_dlls( @imported_function( name is \'OpenFileA\'))'
objectfilter.Parser(query).Parse()
# Can nest context operators and mix braces without it messing up.
query = '@imported_dlls( @imported_function( name is \'OpenFileA\'))'
parser = objectfilter.Parser(query).Parse()
query = u'\n'.join([
'@imported_dlls',
'(',
' @imported_function',
' (',
' name is "OpenFileA" and ordinal == 12',
' )',
')'])
parser = objectfilter.Parser(query).Parse()
# Mix context and binary operators.
query = u'\n'.join([
'@imported_dlls',
'(',
' @imported_function',
' (',
' name is "OpenFileA"',
' ) AND num_functions == 2',
')'])
parser = objectfilter.Parser(query).Parse()
# Also on the right.
query = u'\n'.join([
'@imported_dlls',
'(',
' num_functions == 2 AND',
' @imported_function',
' (',
' name is "OpenFileA"',
' )',
')'])
# Altogether.
# There's an imported dll that imports OpenFileA AND
# an imported DLL matching advapi32.dll that imports RegQueryValueExA AND
# and it exports a symbol called 'inject'.
query = u'\n'.join([
'@imported_dlls( @imported_function ( name is "OpenFileA" ) )',
'AND',
'@imported_dlls (',
' name regexp "(?i)advapi32.dll"',
' AND @imported_function ( name is "RegQueryValueEx" )',
')',
'AND @exported_symbols(name is "inject")'])
def testCompile(self):
obj = DummyObject('something', 'Blue')
parser = objectfilter.Parser('something == \'Blue\'').Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), True)
parser = objectfilter.Parser('something == \'Red\'').Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), False)
parser = objectfilter.Parser('something == "Red"').Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), False)
obj = DummyObject('size', 4)
parser = objectfilter.Parser('size < 3').Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), False)
parser = objectfilter.Parser('size == 4').Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), True)
query = 'something is \'Blue\' and size not contains 3'
parser = objectfilter.Parser(query).Parse()
filter_ = parser.Compile(self.filter_imp)
self.assertEqual(filter_.Matches(obj), False)
if __name__ == '__main__':
unittest.main()
+394
View File
@@ -0,0 +1,394 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the interface for output parsing of plaso.
The default output or storage mechanism of Plaso is not in a human
readable format. There needs to be a way to define the output in such
a way.
After the timeline is collected and stored another tool can read, filter,
sort and process the output inside the storage, and send each processed
entry to an output formatter that takes care of parsing the output into
a human readable format for easy human consumption/analysis.
"""
import abc
import logging
import sys
from plaso.lib import errors
from plaso.lib import registry
from plaso.lib import utils
import pytz
class LogOutputFormatter(object):
"""A base class for formatting output produced by plaso.
This class exists mostly for documentation purposes. Subclasses should
override the relevant methods to act on the callbacks.
"""
__metaclass__ = registry.MetaclassRegistry
__abstract = True
# Optional arguments to be added to the argument parser.
# An example would be:
# ARGUMENTS = [('--myparameter', {
# 'action': 'store',
# 'help': 'This is my parameter help',
# 'dest': 'myparameter',
# 'default': '',
# 'type': 'unicode'})]
#
# Where all arguments into the dict object have a direct translation
# into the argparse parser.
ARGUMENTS = []
def __init__(self, store, filehandle=sys.stdout, config=None,
filter_use=None):
"""Constructor for the output module.
Args:
store: A StorageFile object that defines the storage.
filehandle: A file-like object that can be written to.
config: The configuration object, containing config information.
filter_use: A filter_interface.FilterObject object.
"""
zone = getattr(config, 'timezone', 'UTC')
try:
self.zone = pytz.timezone(zone)
except pytz.UnknownTimeZoneError:
logging.warning(u'Unkown timezone: {0:s} defaulting to: UTC'.format(
zone))
self.zone = pytz.utc
self.filehandle = filehandle
self.store = store
self._filter = filter_use
self._config = config
self.encoding = getattr(config, 'preferred_encoding', 'utf-8')
# TODO: this function seems to be only called with the default arguments,
# so refactor this function away.
def FetchEntry(self, store_number=-1, store_index=-1):
"""Fetches an entry from the storage.
Fetches the next entry in the storage file, except if location
is explicitly indicated.
Args:
store_number: The store number if explicit location is to be read.
store_index: The index into the store, if explicit location is to be
read.
Returns:
An EventObject, either the next one or from a specific location.
"""
if store_number > 0:
return self.store.GetEventObject(store_number, store_index)
else:
return self.store.GetSortedEntry()
def WriteEvent(self, evt):
"""Write the output of a single entry to the output filehandle.
This method takes care of actually outputting each event in
question. It does so by first prepending it with potential
start of event, then processes the main body before appending
a potential end of event.
Args:
evt: An EventObject, defined in the event library.
"""
self.StartEvent()
self.EventBody(evt)
self.EndEvent()
@abc.abstractmethod
def EventBody(self, evt):
"""Writes the main body of an event to the output filehandle.
Args:
evt: An EventObject, defined in the event library.
Raises:
NotImplementedError: When not implemented.
"""
def StartEvent(self):
"""This should be extended by specific implementations.
This method does all preprocessing or output before each event
is printed, for instance to surround XML events with tags, etc.
"""
pass
def EndEvent(self):
"""This should be extended by specific implementations.
This method does all the post-processing or output after
each event has been printed, such as closing XML tags, etc.
"""
pass
def Start(self):
"""This should be extended by specific implementations.
Depending on the file format of the output it may need
a header. This method should return a header if one is
defined in that output format.
"""
pass
def End(self):
"""This should be extended by specific implementations.
Depending on the file format of the output it may need
a footer. This method should return a footer if one is
defined in that output format.
"""
pass
# Need to suppress this since these classes do not implement the
# abstract method EventBody, classes that inherit from one of these
# classes need to implement that function.
# pylint: disable=abstract-method
class FileLogOutputFormatter(LogOutputFormatter):
"""A simple file based output formatter."""
__abstract = True
def __init__(self, store, filehandle=sys.stdout, config=None,
filter_use=None):
"""Set up the formatter."""
super(FileLogOutputFormatter, self).__init__(
store, filehandle, config, filter_use)
if isinstance(filehandle, basestring):
open_filehandle = open(filehandle, 'wb')
elif hasattr(filehandle, 'write'):
open_filehandle = filehandle
else:
raise IOError(
u'Unable to determine how to use filehandle passed in: {}'.format(
type(filehandle)))
self.filehandle = OutputFilehandle(self.encoding)
self.filehandle.Open(open_filehandle)
def End(self):
"""Close the open filehandle after the last output."""
super(FileLogOutputFormatter, self).End()
self.filehandle.Close()
class EventBuffer(object):
"""Buffer class for EventObject output processing."""
MERGE_ATTRIBUTES = ['inode', 'filename', 'display_name']
def __init__(self, formatter, check_dedups=True):
"""Initialize the EventBuffer.
This class is used for buffering up events for duplicate removals
and for other post-processing/analysis of events before being presented
by the appropriate output module.
Args:
formatter: An OutputFormatter object.
check_dedups: Boolean value indicating whether or not the buffer should
check and merge duplicate entries or not.
"""
self._buffer_dict = {}
self._current_timestamp = 0
self.duplicate_counter = 0
self.check_dedups = check_dedups
self.formatter = formatter
self.formatter.Start()
def Append(self, event_object):
"""Append an EventObject into the processing pipeline.
Args:
event_object: The EventObject that is being added.
"""
if not self.check_dedups:
self.formatter.WriteEvent(event_object)
return
if event_object.timestamp != self._current_timestamp:
self._current_timestamp = event_object.timestamp
self.Flush()
key = event_object.EqualityString()
if key in self._buffer_dict:
self.JoinEvents(event_object, self._buffer_dict.pop(key))
self._buffer_dict[key] = event_object
def Flush(self):
"""Flushes the buffer by sending records to a formatter and prints."""
if not self._buffer_dict:
return
for event_object in self._buffer_dict.values():
try:
self.formatter.WriteEvent(event_object)
except errors.WrongFormatter as exception:
logging.error(u'Unable to write event: {:s}'.format(exception))
self._buffer_dict = {}
def JoinEvents(self, event_a, event_b):
"""Join this EventObject with another one."""
self.duplicate_counter += 1
# TODO: Currently we are using the first event pathspec, perhaps that
# is not the best approach. There is no need to have all the pathspecs
# inside the combined event, however which one should be chosen is
# perhaps something that can be evaluated here (regular TSK in favor of
# an event stored deep inside a VSS for instance).
for attr in self.MERGE_ATTRIBUTES:
val_a = set(utils.GetUnicodeString(getattr(event_a, attr, '')).split(';'))
val_b = set(utils.GetUnicodeString(getattr(event_b, attr, '')).split(';'))
values_list = list(val_a | val_b)
values_list.sort() # keeping this consistent across runs helps with diffs
setattr(event_a, attr, u';'.join(values_list))
# Special instance if this is a filestat entry we need to combine the
# description field.
if getattr(event_a, 'parser', u'') == 'filestat':
description_a = set(getattr(event_a, 'timestamp_desc', u'').split(';'))
description_b = set(getattr(event_b, 'timestamp_desc', u'').split(';'))
descriptions = list(description_a | description_b)
descriptions.sort()
if event_b.timestamp_desc not in event_a.timestamp_desc:
setattr(event_a, 'timestamp_desc', u';'.join(descriptions))
def End(self):
"""Call the formatter to produce the closing line."""
self.Flush()
if self.formatter:
self.formatter.End()
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make usable with "with" statement."""
self.End()
def __enter__(self):
"""Make usable with "with" statement."""
return self
class OutputFilehandle(object):
"""A simple wrapper for filehandles to make character encoding easier.
All data is stored as an unicode text internally. However there are some
issues with clients that try to output unicode text to a non-unicode terminal.
Therefore a wrapper is created that checks if we are writing to a file, thus
using the default unicode encoding or if the attempt is to write to the
terminal, for which the default encoding of that terminal is used to encode
the text (if possible).
"""
DEFAULT_ENCODING = 'utf-8'
def __init__(self, encoding='utf-8'):
"""Initialize the output file handler.
Args:
encoding: The default terminal encoding, only used if attempted to write
to the terminal.
"""
self._filehandle = None
self._encoding = encoding
# An attribute stating whether or not this is STDOUT.
self._standard_out = False
def Open(self, filehandle=sys.stdout, path=''):
"""Open a filehandle to an output file.
Args:
filehandle: A file-like-object that is used to write data to.
path: If a file like object is not passed in it is possible
to pass in a path to a file, and a file-like-objec will be created.
"""
if path:
self._filehandle = open(path, 'wb')
else:
self._filehandle = filehandle
if not hasattr(self._filehandle, 'name'):
self._standard_out = True
elif self._filehandle.name.startswith('<stdout>'):
self._standard_out = True
def WriteLine(self, line):
"""Write a single line to the supplied filehandle."""
if not self._filehandle:
return
if self._standard_out:
# Write using preferred user encoding.
try:
self._filehandle.write(line.encode(self._encoding))
except UnicodeEncodeError:
logging.error(
u'Unable to properly write logline, save output to a file to '
u'prevent missing data.')
self._filehandle.write(line.encode(self._encoding, 'ignore'))
else:
# Write to a file, use unicode.
self._filehandle.write(line.encode(self.DEFAULT_ENCODING))
def Close(self):
"""Close the filehandle, if applicable."""
if self._filehandle and not self._standard_out:
self._filehandle.close()
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make usable with "with" statement."""
self.Close()
def __enter__(self):
"""Make usable with "with" statement."""
return self
def GetOutputFormatter(output_string):
"""Return an output formatter that matches the provided string."""
# Format the output string (make the input case in-sensitive).
if type(output_string) not in (str, unicode):
return None
format_str = ''.join(
[output_string[0].upper(), output_string[1:].lower()])
return LogOutputFormatter.classes.get(format_str, None)
def ListOutputFormatters():
"""Generate a list of all available output formatters."""
for cl in LogOutputFormatter.classes:
formatter_class = LogOutputFormatter.classes[cl](None)
doc_string, _, _ = formatter_class.__doc__.partition('\n')
yield cl, doc_string
+193
View File
@@ -0,0 +1,193 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains tests for the output formatter."""
import os
import locale
import sys
import tempfile
import unittest
from plaso.lib import output
class DummyEvent(object):
"""Simple class that defines a dummy event."""
def __init__(self, timestamp, entry):
self.date = u'03/01/2012'
try:
self.timestamp = int(timestamp)
except ValueError:
self.timestamp = 0
self.entry = entry
def EqualityString(self):
return u';'.join(map(str, [self.timestamp, self.entry]))
class TestOutput(output.LogOutputFormatter):
"""This is a test output module that provides a simple XML."""
def __init__(self, filehandle):
"""Fake the store."""
super(TestOutput, self).__init__(store=None, filehandle=filehandle)
def StartEvent(self):
self.filehandle.write(u'<Event>\n')
def EventBody(self, event_object):
self.filehandle.write((
u'\t<Date>{0:s}</Date>\n\t<Time>{1:d}</Time>\n'
u'\t<Entry>{2:s}</Entry>\n').format(
event_object.date, event_object.timestamp, event_object.entry))
def EndEvent(self):
self.filehandle.write(u'</Event>\n')
def FetchEntry(self, **_):
pass
def Start(self):
self.filehandle.write(u'<EventFile>\n')
def End(self):
self.filehandle.write(u'</EventFile>\n')
class PlasoOutputUnitTest(unittest.TestCase):
"""The unit test for plaso output formatting."""
def testOutput(self):
"""Test a test implementation of the output formatter."""
events = [DummyEvent(123456, u'My Event Is Now!'),
DummyEvent(123458, u'There is no tomorrow.'),
DummyEvent(123462, u'Tomorrow is now.'),
DummyEvent(123489, u'This is just some stuff to fill the line.')]
lines = []
with tempfile.NamedTemporaryFile() as fh:
formatter = TestOutput(fh)
formatter.Start()
for event_object in events:
formatter.WriteEvent(event_object)
formatter.End()
fh.seek(0)
for line in fh:
lines.append(line)
self.assertEquals(len(lines), 22)
self.assertEquals(lines[0], u'<EventFile>\n')
self.assertEquals(lines[1], u'<Event>\n')
self.assertEquals(lines[2], u'\t<Date>03/01/2012</Date>\n')
self.assertEquals(lines[3], u'\t<Time>123456</Time>\n')
self.assertEquals(lines[4], u'\t<Entry>My Event Is Now!</Entry>\n')
self.assertEquals(lines[5], u'</Event>\n')
self.assertEquals(lines[6], u'<Event>\n')
self.assertEquals(lines[7], u'\t<Date>03/01/2012</Date>\n')
self.assertEquals(lines[8], u'\t<Time>123458</Time>\n')
self.assertEquals(lines[9], u'\t<Entry>There is no tomorrow.</Entry>\n')
self.assertEquals(lines[10], u'</Event>\n')
self.assertEquals(lines[11], u'<Event>\n')
self.assertEquals(lines[-1], u'</EventFile>\n')
def testOutputList(self):
"""Test listing up all available registered modules."""
module_seen = False
for name, description in output.ListOutputFormatters():
if 'TestOutput' in name:
module_seen = True
self.assertEquals(description, (
u'This is a test output module that provides a simple XML.'))
self.assertTrue(module_seen)
class EventBufferTest(unittest.TestCase):
"""Few unit tests for the EventBuffer class."""
def testFlush(self):
"""Test to ensure we empty our buffers and sends to output properly."""
with tempfile.NamedTemporaryFile() as fh:
def CheckBufferLength(event_buffer, expected):
if not event_buffer.check_dedups:
expected = 0
# pylint: disable=protected-access
self.assertEquals(len(event_buffer._buffer_dict), expected)
formatter = TestOutput(fh)
event_buffer = output.EventBuffer(formatter, False)
event_buffer.Append(DummyEvent(123456, u'Now is now'))
CheckBufferLength(event_buffer, 1)
# Add three events.
event_buffer.Append(DummyEvent(123456, u'OMG I AM DIFFERENT'))
event_buffer.Append(DummyEvent(123456, u'Now is now'))
event_buffer.Append(DummyEvent(123456, u'Now is now'))
CheckBufferLength(event_buffer, 2)
event_buffer.Flush()
CheckBufferLength(event_buffer, 0)
event_buffer.Append(DummyEvent(123456, u'Now is now'))
event_buffer.Append(DummyEvent(123456, u'Now is now'))
event_buffer.Append(DummyEvent(123456, u'Different again :)'))
CheckBufferLength(event_buffer, 2)
event_buffer.Append(DummyEvent(123457, u'Now is different'))
CheckBufferLength(event_buffer, 1)
class OutputFilehandleTest(unittest.TestCase):
"""Few unit tests for the OutputFilehandle."""
def setUp(self):
self.preferred_encoding = locale.getpreferredencoding()
def _GetLine(self):
# Time, Þorri allra landsmanna hlýddu á atburðinn.
return ('Time, \xc3\x9eorri allra landsmanna hl\xc3\xbdddu \xc3\xa1 '
'atbur\xc3\xb0inn.\n').decode('utf-8')
def testFilePath(self):
temp_path = ''
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
temp_path = temp_file.name
with output.OutputFilehandle(self.preferred_encoding) as fh:
fh.Open(path=temp_path)
fh.WriteLine(self._GetLine())
line_read = u''
with open(temp_path, 'rb') as output_file:
line_read = output_file.read()
os.remove(temp_path)
self.assertEquals(line_read, self._GetLine().encode('utf-8'))
def testStdOut(self):
with output.OutputFilehandle(self.preferred_encoding) as fh:
fh.Open(sys.stdout)
try:
fh.WriteLine(self._GetLine())
self.assertTrue(True)
except (UnicodeEncodeError, UnicodeDecodeError):
self.assertTrue(False)
if __name__ == '__main__':
unittest.main()
+455
View File
@@ -0,0 +1,455 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An extension of the objectfilter to provide plaso specific options."""
import datetime
import logging
from plaso.formatters import manager as formatters_manager
# TODO: Changes this so it becomes an attribute instead of having backend
# load a front-end library.
from plaso.frontend import presets
from plaso.lib import limit
from plaso.lib import objectfilter
from plaso.lib import timelib
from plaso.lib import utils
class DictObject(object):
"""A simple object representing a dict object.
To filter against an object that is stored as a dictionary the dict
is converted into a simple object. Since keys can contain spaces
and/or other symbols they are stripped out to make filtering work
like it is another object.
Example dict:
{'A value': 234,
'this (my) key_': 'value',
'random': True,
}
This object would then allow access to object.thismykey that would access
the key 'this (my) key_' inside the dict.
"""
def __init__(self, dict_object):
"""Initialize the object and build a secondary dict."""
# TODO: Move some of this code to a more value typed system.
self._dict_object = dict_object
self._dict_translated = {}
for key, value in dict_object.items():
self._dict_translated[self._StripKey(key)] = value
def _StripKey(self, key):
"""Return a stripped version of the dict key without symbols."""
try:
return str(key).lower().translate(None, ' (){}+_=-<>[]')
except UnicodeEncodeError:
pass
def __getattr__(self, attr):
"""Return back entries from the dictionary."""
if attr in self._dict_object:
return self._dict_object.get(attr)
# Special case of getting all the key/value pairs.
if attr == '__all__':
ret = []
for key, value in self._dict_translated.items():
ret.append(u'{}:{}'.format(key, value))
return u' '.join(ret)
test = self._StripKey(attr)
if test in self._dict_translated:
return self._dict_translated.get(test)
class PlasoValueExpander(objectfilter.AttributeValueExpander):
"""An expander that gives values based on object attribute names."""
def __init__(self):
"""Initialize an attribue value expander."""
super(PlasoValueExpander, self).__init__()
self._formatters_manager = formatters_manager.EventFormatterManager
def _GetMessage(self, obj):
"""Return a properly formatted message string."""
ret = u''
try:
ret, _ = self._formatters_manager.GetMessageStrings(obj)
except KeyError as exception:
logging.warning(u'Unable to correctly assemble event: {0:s}'.format(
exception))
return ret
def _GetSources(self, obj):
"""Return a properly formatted source strings."""
try:
source_short, source_long = self._formatters_manager.GetSourceStrings(obj)
except KeyError as exception:
logging.warning(u'Unable to correctly assemble event: {0:s}'.format(
exception))
return source_short, source_long
def _GetValue(self, obj, attr_name):
ret = getattr(obj, attr_name, None)
if ret:
if isinstance(ret, dict):
ret = DictObject(ret)
if attr_name == 'tag':
return ret.tags
return ret
# Check if this is a message request and we have a regular EventObject.
if attr_name == 'message':
return self._GetMessage(obj)
# Check if this is a source_short request.
if attr_name in ('source', 'source_short'):
source_short, _ = self._GetSources(obj)
return source_short
# Check if this is a source_long request.
if attr_name in ('source_long', 'sourcetype'):
_, source_long = self._GetSources(obj)
return source_long
def _GetAttributeName(self, path):
return path[0].lower()
class PlasoExpression(objectfilter.BasicExpression):
"""A Plaso specific expression."""
# A simple dictionary used to swap attributes so other names can be used
# to reference some core attributes (implementation specific).
swap_source = {
'date': 'timestamp',
'datetime': 'timestamp',
'time': 'timestamp',
'description_long': 'message',
'description': 'message',
'description_short': 'message_short',
}
def Compile(self, filter_implementation):
self.attribute = self.swap_source.get(self.attribute, self.attribute)
arguments = [self.attribute]
op_str = self.operator.lower()
operator = filter_implementation.OPS.get(op_str, None)
if not operator:
raise objectfilter.ParseError(u'Unknown operator {0:s} provided.'.format(
self.operator))
# Plaso specific implementation - if we are comparing a timestamp
# to a value, we use our specific implementation that compares
# timestamps in a "human readable" format.
if self.attribute == 'timestamp':
args = []
for arg in self.args:
args.append(DateCompareObject(arg))
self.args = args
for arg in self.args:
if isinstance(arg, DateCompareObject):
if 'Less' in str(operator):
TimeRangeCache.SetUpperTimestamp(arg.data)
else:
TimeRangeCache.SetLowerTimestamp(arg.data)
arguments.extend(self.args)
expander = filter_implementation.FILTERS['ValueExpander']
ops = operator(arguments=arguments, value_expander=expander)
if not self.bool_value:
if hasattr(ops, 'FlipBool'):
ops.FlipBool()
return ops
class ParserList(objectfilter.GenericBinaryOperator):
"""Matches when a parser is inside a predefined list of parsers."""
def __init__(self, *children, **kwargs):
"""Construct the parser list and retrieve a list of available parsers."""
super(ParserList, self).__init__(*children, **kwargs)
self.compiled_list = presets.categories.get(
self.right_operand.lower(), [])
def Operation(self, x, unused_y):
"""Return a bool depending on the parser list contains the parser."""
if self.left_operand != 'parser':
raise objectfilter.MalformedQueryError(
u'Unable to use keyword "inlist" for other than parser.')
if x in self.compiled_list:
return True
return False
class PlasoAttributeFilterImplementation(objectfilter.BaseFilterImplementation):
"""Does field name access on the lowercase version of names.
Useful to only access attributes and properties with Google's python naming
style.
"""
FILTERS = {}
FILTERS.update(objectfilter.BaseFilterImplementation.FILTERS)
FILTERS.update({'ValueExpander': PlasoValueExpander})
OPS = objectfilter.OP2FN
OPS.update({'inlist': ParserList,})
class DateCompareObject(object):
"""A specific class created for date comparison.
This object takes a date representation, whether that is a direct integer
datetime object or a string presenting the date, and uses that for comparing
against timestamps stored in microseconds in in microseconds since
Jan 1, 1970 00:00:00 UTC.
This makes it possible to use regular comparison operators for date,
irrelevant of the format the date comes in, since plaso stores all timestamps
in the same format, which is an integer/long, it is a simple manner of
changing the input into the same format (int) and compare that.
"""
def __init__(self, data):
"""Take a date object and use that for comparison.
Args:
data: A string, datetime object or an integer that
represents the time to compare against. Time should be stored
as microseconds since UTC in Epoch format.
Raises:
ValueError: if the date string is invalid.
"""
self.text = utils.GetUnicodeString(data)
if type(data) in (int, long):
self.data = data
elif type(data) == float:
self.data = long(data)
elif type(data) in (str, unicode):
try:
self.data = timelib.Timestamp.FromTimeString(
utils.GetUnicodeString(data))
except ValueError as exception:
raise ValueError(u'Wrongly formatted date string: {0:s} - {1:s}'.format(
data, exception))
elif type(data) == datetime.datetime:
self.data = timelib.Timestamp.FromPythonDatetime(data)
elif isinstance(DateCompareObject, data):
self.data = data.data
else:
raise ValueError(u'Unsupported type: {0:s}.'.format(type(data)))
def __cmp__(self, x):
"""A simple comparison operation."""
try:
x_date = DateCompareObject(x)
return cmp(self.data, x_date.data)
except ValueError:
return False
def __le__(self, x):
"""Less or equal comparison."""
return self.data <= x
def __ge__(self, x):
"""Greater or equal comparison."""
return self.data >= x
def __eq__(self, x):
"""Check if equal."""
return x == self.data
def __ne__(self, x):
"""Check if not equal."""
return x != self.data
def __str__(self):
"""Return a string representation of the object."""
return self.text
class BaseParser(objectfilter.Parser):
"""Plaso version of the Parser."""
expression_cls = PlasoExpression
class TrueObject(object):
"""A simple object that always returns true for all comparison.
This object is used for testing certain conditions inside filter queries.
By returning true for all comparisons this object can be used to evaluate
specific portions of a filter query.
"""
def __init__(self, txt=''):
"""Save the text object so it can be used when comparing text."""
self.txt = txt
def __getattr__(self, unused_attr):
"""Return a TrueObject for every attribute request."""
return self
def __eq__(self, unused_x):
"""Return true for tests of equality."""
return True
def __gt__(self, unused_x):
"""Return true for checks for greater."""
return True
def __ge__(self, unused_x):
"""Return true for checks for greater or equal."""
return True
def __lt__(self, unused_x):
"""Return true for checks of less."""
return True
def __le__(self, unused_x):
"""Return true for checks of less or equal."""
return True
def __ne__(self, unused_x):
"""Return true for all not equal comparisons."""
return True
def __iter__(self):
"""Return a generator so a test for the in keyword can be used."""
yield self
def __str__(self):
"""Return a string to make regular expression searches possible.
Returns:
A string that containes the original query with some of the matches
expanded, perhaps several times.
"""
# Regular expressions in pfilter may include the following escapes:
# "\\'\"rnbt\.ws":
txt = self.txt
if r'\.' in self.txt:
txt += self.txt.replace(r'\.', ' _ text _ ')
if r'\b' in self.txt:
txt += self.txt.replace(r'\b', ' ')
if r'\s' in self.txt:
txt += self.txt.replace(r'\s', ' ')
return txt
class MockTestFilter(object):
"""A mock test filter object used to test certain portion of test queries.
The logic behind this object is that a single attribute can be isolated
for comparison. That is to say all calls to attributes will lead to a TRUE
response, except those attributes that are specifically stated in the
constructor. This way it is simple to test for instance whether or not
to include a parser at all, before actually running the tool. The same applies
to filtering out certain filenames, etc.
"""
def __init__(self, query, **kwargs):
"""Constructor, only valid attribute is the parser one."""
self.attributes = kwargs
self.txt = query
def __getattr__(self, attr):
"""Return TrueObject for all requests except for stored attributes."""
if attr in self.attributes:
return self.attributes.get(attr, None)
# TODO: Either delete this entire object (MockTestFilter) or implement
# a false object and return the correct one depending on whether we
# are looking for a true or negative response (eg "not" keyword included).
return TrueObject(self.txt)
class TimeRangeCache(object):
"""A class that stores timeranges from filters."""
@classmethod
def ResetTimeConstraints(cls):
"""Resets the time constraints."""
if hasattr(cls, '_lower'):
del cls._lower
if hasattr(cls, '_upper'):
del cls._upper
@classmethod
def SetLowerTimestamp(cls, timestamp):
"""Sets the lower bound timestamp."""
if not hasattr(cls, '_lower'):
cls._lower = timestamp
return
if timestamp < cls._lower:
cls._lower = timestamp
@classmethod
def SetUpperTimestamp(cls, timestamp):
"""Sets the upper bound timestamp."""
if not hasattr(cls, '_upper'):
cls._upper = timestamp
return
if timestamp > cls._upper:
cls._upper = timestamp
@classmethod
def GetTimeRange(cls):
"""Return the first and last timestamp of filter range."""
first = getattr(cls, '_lower', 0)
last = getattr(cls, '_upper', limit.MAX_INT64)
if first < last:
return first, last
else:
return last, first
def GetMatcher(query, quiet=False):
"""Return a filter match object for a given query."""
matcher = None
try:
parser = BaseParser(query).Parse()
matcher = parser.Compile(PlasoAttributeFilterImplementation)
except objectfilter.ParseError as exception:
if not quiet:
logging.error(u'Filter <{0:s}> malformed: {1:s}'.format(
query, exception))
return matcher
+238
View File
@@ -0,0 +1,238 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the filters."""
import unittest
from plaso.formatters import interface as formatters_interface
from plaso.lib import event
from plaso.lib import objectfilter
from plaso.lib import pfilter
from plaso.lib import timelib_test
from plaso.parsers import interface as parsers_interface
import pytz
class Empty(object):
"""An empty object."""
class PfilterFakeFormatter(formatters_interface.EventFormatter):
"""A formatter for this fake class."""
DATA_TYPE = 'Weirdo:Made up Source:Last Written'
FORMAT_STRING = '{text}'
FORMAT_STRING_SHORT = '{text_short}'
SOURCE_LONG = 'Fake Parsing Source'
SOURCE_SHORT = 'REG'
class PfilterFakeParser(parsers_interface.BaseParser):
"""A fake parser that does not parse anything, but registers."""
NAME = 'pfilter_fake_parser'
DATA_TYPE = 'Weirdo:Made up Source:Last Written'
def Parse(self, unused_parser_context, unused_file_entry):
"""Extract data from a fake plist file for testing.
Args:
parser_context: A parser context object (instance of ParserContext).
file_entry: A file entry object (instance of dfvfs.FileEntry).
Yields:
An event object (instance of EventObject) that contains the parsed
attributes.
"""
event_object = event.EventObject()
event_object.timestamp = timelib_test.CopyStringToTimestamp(
'2015-11-18 01:15:43')
event_object.timestamp_desc = 'Last Written'
event_object.text_short = 'This description is different than the long one.'
event_object.text = (
u'User did a very bad thing, bad, bad thing that awoke Dr. Evil.')
event_object.filename = (
u'/My Documents/goodfella/Documents/Hideout/myfile.txt')
event_object.hostname = 'Agrabah'
event_object.parser = 'Weirdo'
event_object.inode = 1245
event_object.display_name = u'unknown:{0:s}'.format(event_object.filename)
event_object.data_type = self.DATA_TYPE
yield event_object
class PfilterAnotherParser(PfilterFakeParser):
"""Another fake parser that does nothing but register as a parser."""
NAME = 'pfilter_another_fake'
DATA_TYPE = 'Weirdo:AnotherFakeSource'
class PfilterAnotherFakeFormatter(PfilterFakeFormatter):
"""Formatter for the AnotherParser event."""
DATA_TYPE = 'Weirdo:AnotherFakeSource'
SOURCE_LONG = 'Another Fake Source'
class PfilterAllEvilParser(PfilterFakeParser):
"""A class that does nothing but has a fancy name."""
NAME = 'pfilter_evil_fake_parser'
DATA_TYPE = 'Weirdo:AllEvil'
class PfilterEvilFormatter(PfilterFakeFormatter):
"""Formatter for the AllEvilParser."""
DATA_TYPE = 'Weirdo:AllEvil'
SOURCE_LONG = 'A Truly Evil'
class PFilterTest(unittest.TestCase):
"""Simple plaso specific tests to the pfilter implementation."""
def setUp(self):
"""Set up the necessary variables used in tests."""
self._pre = Empty()
self._pre.zone = pytz.UTC
def testPlasoEvents(self):
"""Test plaso EventObjects, both Python and Protobuf version.
These are more plaso specific tests than the more generic
objectfilter ones. It will create an EventObject that stores
some attributes. These objects will then be serialzed into an
EventObject protobuf and all tests run against both the native
Python object as well as the protobuf.
"""
event_object = event.EventObject()
event_object.data_type = 'Weirdo:Made up Source:Last Written'
event_object.timestamp = timelib_test.CopyStringToTimestamp(
'2015-11-18 01:15:43')
event_object.timestamp_desc = 'Last Written'
event_object.text_short = 'This description is different than the long one.'
event_object.text = (
u'User did a very bad thing, bad, bad thing that awoke Dr. Evil.')
event_object.filename = (
u'/My Documents/goodfella/Documents/Hideout/myfile.txt')
event_object.hostname = 'Agrabah'
event_object.parser = 'Weirdo'
event_object.inode = 1245
event_object.mydict = {
'value': 134, 'another': 'value', 'A Key (with stuff)': 'Here'}
event_object.display_name = u'unknown:{0:s}'.format(event_object.filename)
# Series of tests.
query = 'filename contains \'GoodFella\''
self.RunPlasoTest(event_object, query, True)
# Double negative matching -> should be the same
# as a positive one.
query = 'filename not not contains \'GoodFella\''
my_parser = pfilter.BaseParser(query)
self.assertRaises(
objectfilter.ParseError,
my_parser.Parse)
# Test date filtering.
query = 'date >= \'2015-11-18\''
self.RunPlasoTest(event_object, query, True)
query = 'date < \'2015-11-19\''
self.RunPlasoTest(event_object, query, True)
# 2015-11-18T01:15:43
query = (
'date < \'2015-11-18T01:15:44.341\' and date > \'2015-11-18 01:15:42\'')
self.RunPlasoTest(event_object, query, True)
query = 'date > \'2015-11-19\''
self.RunPlasoTest(event_object, query, False)
# Perform few attribute tests.
query = 'filename not contains \'sometext\''
self.RunPlasoTest(event_object, query, True)
query = (
'timestamp_desc CONTAINS \'written\' AND date > \'2015-11-18\' AND '
'date < \'2015-11-25 12:56:21\' AND (source_short contains \'LOG\' or '
'source_short CONTAINS \'REG\')')
self.RunPlasoTest(event_object, query, True)
query = 'parser is not \'Made\''
self.RunPlasoTest(event_object, query, True)
query = 'parser is not \'Weirdo\''
self.RunPlasoTest(event_object, query, False)
query = 'mydict.value is 123'
self.RunPlasoTest(event_object, query, False)
query = 'mydict.akeywithstuff contains "ere"'
self.RunPlasoTest(event_object, query, True)
query = 'mydict.value is 134'
self.RunPlasoTest(event_object, query, True)
query = 'mydict.value < 200'
self.RunPlasoTest(event_object, query, True)
query = 'mydict.another contains "val"'
self.RunPlasoTest(event_object, query, True)
query = 'mydict.notthere is 123'
self.RunPlasoTest(event_object, query, False)
query = 'source_long not contains \'Fake\''
self.RunPlasoTest(event_object, query, False)
query = 'source is \'REG\''
self.RunPlasoTest(event_object, query, True)
query = 'source is not \'FILE\''
self.RunPlasoTest(event_object, query, True)
# Multiple attributes.
query = (
'source_long is \'Fake Parsing Source\' AND description_long '
'regexp \'bad, bad thing [\\sa-zA-Z\\.]+ evil\'')
self.RunPlasoTest(event_object, query, False)
query = (
'source_long is \'Fake Parsing Source\' AND text iregexp '
'\'bad, bad thing [\\sa-zA-Z\\.]+ evil\'')
self.RunPlasoTest(event_object, query, True)
def RunPlasoTest(self, obj, query, result):
"""Run a simple test against an event object."""
my_parser = pfilter.BaseParser(query).Parse()
matcher = my_parser.Compile(
pfilter.PlasoAttributeFilterImplementation)
self.assertEqual(result, matcher.Matches(obj))
if __name__ == "__main__":
unittest.main()
+130
View File
@@ -0,0 +1,130 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a proxy object that can be used to provide RPC access."""
import abc
def GetProxyPortNumberFromPID(process_id):
"""Simple mechanism to set the port number based on a PID value.
Args:
process_id: An integer, process ID (PID), value that should be used to find
a port number.
Returns:
An integer indicating a possible port number for the process to listen on.
"""
# TODO: Improve this method of selecting ports.
# This is in now way a perfect algorightm for choosing port numbers (what if
# it is already assigned?, etc)
if process_id < 1024:
return process_id + 1024
if process_id > 65535:
# Return the remainder of highest port number, sent back to the
# function itself, since this number could be lower than 1024.
return GetProxyPortNumberFromPID(process_id % 65535)
return process_id
class ProxyServer(object):
"""An interface defining functions needed for a proxy object."""
def __init__(self, port=0):
"""Initialize the proxy object.
Args:
port: An integer indicating the port number the proxy listens to.
This is optional and defaults to port zero.
"""
super(ProxyServer, self).__init__()
self._port_number = port
def __enter__(self):
"""Make usable with "with" statement."""
return self
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make usable with "with" statement."""
self.Close()
@property
def listening_port(self):
"""Returns back the port the proxy listens to."""
return self._port_number
@abc.abstractmethod
def Close(self):
"""Close the proxy server."""
@abc.abstractmethod
def Open(self):
"""Sets up the necessary objects in order for the proxy to be started."""
@abc.abstractmethod
def RegisterFunction(self, function_name, function):
"""Register a function for this proxy.
Args:
function_name: The name of the registered proxy function.
function: The callback for the function providing the answer.
"""
@abc.abstractmethod
def StartProxy(self):
"""Start the proxy.
This usually involves setting up the proxy to bind to an address and
listen to requests.
"""
@abc.abstractmethod
def SetListeningPort(self, new_port_number):
"""Change the port the proxy listens to."""
class ProxyClient(object):
"""An interface defining functions needed to implement a proxy client."""
def __init__(self, port=0):
"""Initialize the proxy client.
Args:
port: An integer indicating the port number the proxy connects to.
This is optional and defaults to port zero.
"""
super(ProxyClient, self).__init__()
self._port_number = port
@abc.abstractmethod
def Open(self):
"""Sets up the necessary objects in order for the proxy to be started."""
@abc.abstractmethod
def GetData(self, call_back_name):
"""Return data extracted from a RPC callback.
Args:
call_back_name: The name of the call back function or attribute registered
in the RPC service.
Returns:
The data returned by the RPC server.
"""
+58
View File
@@ -0,0 +1,58 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains few methods for Plaso."""
import logging
from plaso.lib import output
# TODO: Refactor the putils library so it does not end up being a trash can
# for all things core/front-end. We don't want this to be end up being a
# collection for all methods that have no other home.
class Options(object):
"""A simple configuration object."""
def _FindClasses(class_object, *args):
"""Find all registered classes.
A method to find all registered classes of a particular
class.
Args:
class_object: The parent class.
Returns:
A list of registered classes of that class.
"""
results = []
for cls in class_object.classes:
try:
results.append(class_object.classes[cls](*args))
except Exception:
logging.error(
u'_FindClasses: exception while appending: {0:s}'.format(cls))
raise
return results
def FindAllOutputs():
"""Find all available output modules."""
return _FindClasses(output.LogOutputFormatter, None)
+80
View File
@@ -0,0 +1,80 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a class registration system for plugins."""
import abc
class MetaclassRegistry(abc.ABCMeta):
"""Automatic Plugin Registration through metaclasses."""
def __init__(cls, name, bases, env_dict):
"""Initialize a metaclass.
Args:
name: The interface class name.
bases: A tuple of base names.
env_dict: The namespace of the object.
Raises:
KeyError: If a classes given name is already registered, to make sure
no two classes that inherit from the same interface can have
the same name attribute.
"""
abc.ABCMeta.__init__(cls, name, bases, env_dict)
# Register the name of the immediate parent class.
if bases:
cls.parent_class_name = getattr(bases[0], 'NAME', bases[0])
cls.parent_class = bases[0]
# Attach the classes dict to the baseclass and have all derived classes
# use the same one:
for base in bases:
try:
cls.classes = base.classes
cls.plugin_feature = base.plugin_feature
cls.top_level_class = base.top_level_class
break
except AttributeError:
cls.classes = {}
cls.plugin_feature = cls.__name__
# Keep a reference to the top level class
cls.top_level_class = cls
# The following should not be registered as they are abstract. Classes
# are abstract if the have the __abstract attribute (note this is not
# inheritable so each abstract class must be explicitely marked).
abstract_attribute = '_{0:s}__abstract'.format(name)
if getattr(cls, abstract_attribute, None):
return
if not cls.__name__.startswith('Abstract'):
cls_name = getattr(cls, 'NAME', cls.__name__)
if cls_name in cls.classes:
raise KeyError(u'Class: {0:s} already registered. [{1:s}]'.format(
cls_name, repr(cls)))
cls.classes[cls_name] = cls
try:
if cls.top_level_class.include_plugins_as_attributes:
setattr(cls.top_level_class, cls.__name__, cls)
except AttributeError:
pass
+1564
View File
File diff suppressed because it is too large Load Diff
+340
View File
@@ -0,0 +1,340 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the tests for the event storage."""
import os
import tempfile
import shutil
import unittest
import zipfile
from plaso.engine import queue
from plaso.events import text_events
from plaso.events import windows_events
from plaso.formatters import manager as formatters_manager
from plaso.lib import event
from plaso.lib import eventdata
from plaso.lib import pfilter
from plaso.lib import storage
from plaso.lib import timelib_test
from plaso.multi_processing import multi_process
from plaso.formatters import winreg # pylint: disable=unused-import
from plaso.serializer import protobuf_serializer
class DummyObject(object):
"""Dummy object."""
class GroupMock(object):
"""Mock a class for grouping events together."""
def __init__(self):
self.groups = []
def AddGroup(self, name, events, desc=None, first=0, last=0, color=None,
cat=None):
"""Add a new group of events."""
self.groups.append((name, events, desc, first, last, color, cat))
def __iter__(self):
"""Iterator."""
for name, events, desc, first, last, color, cat in self.groups:
dummy = DummyObject()
dummy.name = name
dummy.events = events
if desc:
dummy.description = desc
if first:
dummy.first_timestamp = int(first)
if last:
dummy.last_timestamp = int(last)
if color:
dummy.color = color
if cat:
dummy.category = cat
yield dummy
class TempDirectory(object):
"""A self cleaning temporary directory."""
def __init__(self):
"""Initializes the temporary directory."""
super(TempDirectory, self).__init__()
self.name = u''
def __enter__(self):
"""Make this work with the 'with' statement."""
self.name = tempfile.mkdtemp()
return self.name
def __exit__(self, unused_type, unused_value, unused_traceback):
"""Make this work with the 'with' statement."""
shutil.rmtree(self.name, True)
class StorageFileTest(unittest.TestCase):
"""Tests for the plaso storage file."""
def setUp(self):
"""Sets up the needed objects used throughout the test."""
self._event_objects = []
# TODO: replace hardcoded timestamps by timelib_test.CopyStringToTimestamp.
event_1 = windows_events.WindowsRegistryEvent(
13349615269295969, u'MY AutoRun key', {u'Value': u'c:/Temp/evil.exe'})
event_1.parser = 'UNKNOWN'
event_2 = windows_events.WindowsRegistryEvent(
13359662069295961, u'\\HKCU\\Secret\\EvilEmpire\\Malicious_key',
{u'Value': u'send all the exes to the other world'})
event_2.parser = 'UNKNOWN'
event_3 = windows_events.WindowsRegistryEvent(
13349402860000000, u'\\HKCU\\Windows\\Normal',
{u'Value': u'run all the benign stuff'})
event_3.parser = 'UNKNOWN'
text_dict = {'text': (
'This is a line by someone not reading the log line properly. And '
'since this log line exceeds the accepted 80 chars it will be '
'shortened.'), 'hostname': 'nomachine', 'username': 'johndoe'}
event_4 = text_events.TextEvent(12389344590000000, 12, text_dict)
event_4.parser = 'UNKNOWN'
self._event_objects.append(event_1)
self._event_objects.append(event_2)
self._event_objects.append(event_3)
self._event_objects.append(event_4)
def testStorageWriter(self):
"""Test the storage writer."""
self.assertEquals(len(self._event_objects), 4)
# The storage writer is normally run in a separate thread.
# For the purpose of this test it has to be run in sequence,
# hence the call to WriteEventObjects after all the event objects
# have been queued up.
# TODO: add upper queue limit.
test_queue = multi_process.MultiProcessingQueue()
test_queue_producer = queue.ItemQueueProducer(test_queue)
test_queue_producer.ProduceItems(self._event_objects)
test_queue_producer.SignalEndOfInput()
with tempfile.NamedTemporaryFile() as temp_file:
storage_writer = storage.StorageFileWriter(test_queue, temp_file)
storage_writer.WriteEventObjects()
z_file = zipfile.ZipFile(temp_file, 'r', zipfile.ZIP_DEFLATED)
expected_z_filename_list = [
'plaso_index.000001', 'plaso_meta.000001', 'plaso_proto.000001',
'plaso_timestamps.000001', 'serializer.txt']
z_filename_list = sorted(z_file.namelist())
self.assertEquals(len(z_filename_list), 5)
self.assertEquals(z_filename_list, expected_z_filename_list)
def testStorage(self):
"""Test the storage object."""
event_objects = []
timestamps = []
group_mock = GroupMock()
tags = []
tags_mock = []
groups = []
group_events = []
same_events = []
serializer = protobuf_serializer.ProtobufEventObjectSerializer
with TempDirectory() as dirname:
temp_file = os.path.join(dirname, 'plaso.db')
store = storage.StorageFile(temp_file)
store.AddEventObjects(self._event_objects)
# Add tagging.
tag_1 = event.EventTag()
tag_1.store_index = 0
tag_1.store_number = 1
tag_1.comment = 'My comment'
tag_1.color = 'blue'
tags_mock.append(tag_1)
tag_2 = event.EventTag()
tag_2.store_index = 1
tag_2.store_number = 1
tag_2.tags = ['Malware']
tag_2.color = 'red'
tags_mock.append(tag_2)
tag_3 = event.EventTag()
tag_3.store_number = 1
tag_3.store_index = 2
tag_3.comment = 'This is interesting'
tag_3.tags = ['Malware', 'Benign']
tag_3.color = 'red'
tags_mock.append(tag_3)
store.StoreTagging(tags_mock)
# Add additional tagging, second round.
tag_4 = event.EventTag()
tag_4.store_index = 1
tag_4.store_number = 1
tag_4.tags = ['Interesting']
store.StoreTagging([tag_4])
group_mock.AddGroup(
'Malicious', [(1, 1), (1, 2)], desc='Events that are malicious',
color='red', first=13349402860000000, last=13349615269295969,
cat='Malware')
store.StoreGrouping(group_mock)
store.Close()
read_store = storage.StorageFile(temp_file, read_only=True)
self.assertTrue(read_store.HasTagging())
self.assertTrue(read_store.HasGrouping())
for event_object in read_store.GetEntries(1):
event_objects.append(event_object)
timestamps.append(event_object.timestamp)
if event_object.data_type == 'windows:registry:key_value':
self.assertEquals(event_object.timestamp_desc,
eventdata.EventTimestamp.WRITTEN_TIME)
else:
self.assertEquals(event_object.timestamp_desc,
eventdata.EventTimestamp.WRITTEN_TIME)
for tag in read_store.GetTagging():
event_object = read_store.GetTaggedEvent(tag)
tags.append(event_object)
groups = list(read_store.GetGrouping())
self.assertEquals(len(groups), 1)
group_events = list(read_store.GetEventsFromGroup(groups[0]))
# Read the same events that were put in the group, just to compare
# against.
event_object = read_store.GetEventObject(1, 1)
serialized_event_object = serializer.WriteSerialized(event_object)
same_events.append(serialized_event_object)
event_object = read_store.GetEventObject(1, 2)
serialized_event_object = serializer.WriteSerialized(event_object)
same_events.append(serialized_event_object)
self.assertEquals(len(event_objects), 4)
self.assertEquals(len(tags), 4)
self.assertEquals(tags[0].timestamp, 12389344590000000)
self.assertEquals(tags[0].store_number, 1)
self.assertEquals(tags[0].store_index, 0)
self.assertEquals(tags[0].tag.comment, u'My comment')
self.assertEquals(tags[0].tag.color, u'blue')
msg, _ = formatters_manager.EventFormatterManager.GetMessageStrings(tags[0])
self.assertEquals(msg[0:10], u'This is a ')
self.assertEquals(tags[1].tag.tags[0], 'Malware')
msg, _ = formatters_manager.EventFormatterManager.GetMessageStrings(tags[1])
self.assertEquals(msg[0:15], u'[\\HKCU\\Windows\\')
self.assertEquals(tags[2].tag.comment, u'This is interesting')
self.assertEquals(tags[2].tag.tags[0], 'Malware')
self.assertEquals(tags[2].tag.tags[1], 'Benign')
self.assertEquals(tags[2].parser, 'UNKNOWN')
# Test the newly added fourth tag, which should include data from
# the first version as well.
self.assertEquals(tags[3].tag.tags[0], 'Interesting')
self.assertEquals(tags[3].tag.tags[1], 'Malware')
expected_timestamps = [
12389344590000000, 13349402860000000, 13349615269295969,
13359662069295961]
self.assertEquals(timestamps, expected_timestamps)
self.assertEquals(groups[0].name, u'Malicious')
self.assertEquals(groups[0].category, u'Malware')
self.assertEquals(groups[0].color, u'red')
self.assertEquals(groups[0].description, u'Events that are malicious')
self.assertEquals(groups[0].first_timestamp, 13349402860000000)
self.assertEquals(groups[0].last_timestamp, 13349615269295969)
self.assertEquals(len(group_events), 2)
self.assertEquals(group_events[0].timestamp, 13349402860000000)
self.assertEquals(group_events[1].timestamp, 13349615269295969L)
proto_group_events = []
for group_event in group_events:
serialized_event_object = serializer.WriteSerialized(group_event)
proto_group_events.append(serialized_event_object)
self.assertEquals(same_events, proto_group_events)
class StoreStorageTest(unittest.TestCase):
"""Test sorting storage file,"""
def setUp(self):
"""Setup sets parameters that will be reused throughout this test."""
# TODO: have sample output generated from the test.
# TODO: Use input data with a defined year. syslog parser chooses a
# year based on system clock; forcing updates to test file if regenerated.
self.test_file = os.path.join('test_data', 'psort_test.out')
self.first = timelib_test.CopyStringToTimestamp('2012-07-20 15:44:14')
self.last = timelib_test.CopyStringToTimestamp('2016-11-18 01:15:43')
def testStorageSort(self):
"""This test ensures that items read and output are in the expected order.
This method by design outputs data as it runs. In order to test this a
a modified output renderer is used for which the flush functionality has
been removed.
The test will be to read the TestEventBuffer storage and check to see
if it matches the known good sort order.
"""
pfilter.TimeRangeCache.ResetTimeConstraints()
pfilter.TimeRangeCache.SetUpperTimestamp(self.last)
pfilter.TimeRangeCache.SetLowerTimestamp(self.first)
store = storage.StorageFile(self.test_file, read_only=True)
store.store_range = [1, 5, 6]
read_list = []
event_object = store.GetSortedEntry()
while event_object:
read_list.append(event_object.timestamp)
event_object = store.GetSortedEntry()
expected_timestamps = [
1344270407000000L, 1392438730000000L, 1427151678000000L,
1451584472000000L]
self.assertEquals(read_list, expected_timestamps)
if __name__ == '__main__':
unittest.main()
+635
View File
@@ -0,0 +1,635 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains functions and variables used for time manipulations.
This file should contain common methods that can be used in Plaso to convert
timestamps in various formats into the standard micro seconds precision integer
Epoch UTC time that is used internally to store timestamps in Plaso.
The file can also contain common functions to change the default timestamp into
a more human readable one.
"""
import calendar
import datetime
import dateutil.parser
import logging
import time
import pytz
MONTH_DICT = {
'jan': 1,
'feb': 2,
'mar': 3,
'apr': 4,
'may': 5,
'jun': 6,
'jul': 7,
'aug': 8,
'sep': 9,
'oct': 10,
'nov': 11,
'dec': 12}
class Timestamp(object):
"""Class for converting timestamps to plaso timestamps.
The Plaso timestamp is a 64-bit signed timestamp value containing:
micro seconds since 1970-01-01 00:00:00.
The timestamp is not necessarily in UTC.
"""
# The minimum timestamp in seconds
TIMESTAMP_MIN_SECONDS = -(((1 << 63L) - 1) / 1000000)
# The maximum timestamp in seconds
TIMESTAMP_MAX_SECONDS = ((1 << 63L) - 1) / 1000000
# The minimum timestamp in micro seconds
TIMESTAMP_MIN_MICRO_SECONDS = -((1 << 63L) - 1)
# The maximum timestamp in micro seconds
TIMESTAMP_MAX_MICRO_SECONDS = (1 << 63L) - 1
# The days per month of a non leap year
DAYS_PER_MONTH = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
# The number of seconds in a day
SECONDS_PER_DAY = 24 * 60 * 60
# The number of micro seconds per second
MICRO_SECONDS_PER_SECOND = 1000000
# The multiplication factor to change milli seconds to micro seconds.
MILLI_SECONDS_TO_MICRO_SECONDS = 1000
# The difference between Jan 1, 1980 and Jan 1, 1970 in seconds.
FAT_DATE_TO_POSIX_BASE = 315532800
# The difference between Jan 1, 1601 and Jan 1, 1970 in micro seconds
WEBKIT_TIME_TO_POSIX_BASE = 11644473600L * 1000000
# The difference between Jan 1, 1601 and Jan 1, 1970 in 100s of nanoseconds.
FILETIME_TO_POSIX_BASE = 11644473600L * 10000000
# The number of seconds between January 1, 1904 and Jan 1, 1970.
# Value confirmed with sleuthkit:
# http://svn.sleuthkit.org/repos/sleuthkit/trunk/tsk3/fs/tsk_hfs.h
# and linux source file linux/include/linux/hfsplus_fs.h
HFSTIME_TO_POSIX_BASE = 2082844800
# The number of seconds between January 1, 1970 and January 1, 2001.
# As specified in:
# https://developer.apple.com/library/ios/documentation/
# cocoa/Conceptual/DatesAndTimes/Articles/dtDates.html
COCOA_TIME_TO_POSIX_BASE = 978307200
# The difference between POSIX (Jan 1, 1970) and DELPHI (Dec 30, 1899).
# http://docwiki.embarcadero.com/Libraries/XE3/en/System.TDateTime
DELPHI_TIME_TO_POSIX_BASE = 25569
@classmethod
def CopyToDatetime(cls, timestamp, timezone, raise_error=False):
"""Copies the timestamp to a datetime object.
Args:
timestamp: An integer containing the timestamp.
timezone: The timezone (pytz.timezone) object.
raise_error: Boolean that if set to True will not absorb an OverflowError
if the timestamp is out of bounds. By default there will be
no error raised.
Returns:
A datetime object.
Raises:
OverflowError: If raises_error is set to True and an OverflowError error
occurs. Otherwise the error is absorbed and a datetime
object from the beginning of UNIX Epoch is returned.
"""
datetime_object = datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=pytz.utc)
try:
datetime_object += datetime.timedelta(microseconds=timestamp)
return datetime_object.astimezone(timezone)
except OverflowError as exception:
if raise_error:
raise
else:
logging.error((
u'Unable to copy {0:d} to a datetime object with error: '
u'{1:s}').format(timestamp, exception))
return datetime_object
@classmethod
def CopyToIsoFormat(cls, timestamp, timezone=pytz.utc, raise_error=False):
"""Copies the timestamp to an ISO 8601 formatted string.
Args:
timestamp: An integer containing the timestamp.
timezone: Optional timezone (instance of pytz.timezone).
The default is UTC.
raise_error: Boolean that if set to True will not absorb an OverflowError
if the timestamp is out of bounds. By default there will be
no error raised.
Returns:
A string containing an ISO 8601 formatted date and time.
"""
datetime_object = cls.CopyToDatetime(
timestamp, timezone, raise_error=raise_error)
return datetime_object.isoformat()
@classmethod
def CopyToPosix(cls, timestamp):
"""Converts microsecond timestamps to POSIX timestamps.
Args:
timestamp: An integer containing the microsecond timestamp.
Returns:
An integer value containing the timestamp.
"""
return timestamp // cls.MICRO_SECONDS_PER_SECOND
@classmethod
def DaysInMonth(cls, month, year):
"""Determines the days in a month for a specific year.
Args:
month: The month where 0 represents January.
year: The year as in 1970.
Returns:
An integer containing the number of days in the month.
Raises:
ValueError: if the month value is invalid.
"""
if month not in range(0, 12):
raise ValueError(u'Invalid month value')
days_per_month = cls.DAYS_PER_MONTH[month]
if month == 1 and cls.IsLeapYear(year):
days_per_month += 1
return days_per_month
@classmethod
def DaysInYear(cls, year):
"""Determines the days in a year.
Args:
year: The year as in 1970.
Returns:
An integer containing the number of days in the year.
"""
days_in_year = 365
if cls.IsLeapYear(year):
return days_in_year + 1
return days_in_year
@classmethod
def DayOfYear(cls, day, month, year):
"""Determines the day of the year for a specific day of a month in a year.
Args:
day: The day of the month where 0 represents the first day.
month: The month where 0 represents January.
year: The year as in 1970.
Returns:
An integer containing the day of year.
"""
day_of_year = day
for past_month in range(0, month):
day_of_year += cls.DaysInMonth(past_month, year)
return day_of_year
@classmethod
def FromCocoaTime(cls, cocoa_time):
"""Converts a Cocoa time to a timestamp.
In Cocoa, time and date values are stored in a unsigned 32-bit integer
containing the number of seconds since January 1, 2001 at 00:00:00
(midnight) UTC (GMT).
Args:
cocoa_time: The timestamp in Cocoa format.
Returns:
An integer containing the timestamp or 0 on error.
"""
return cls.FromPosixTime(cocoa_time + cls.COCOA_TIME_TO_POSIX_BASE)
@classmethod
def FromDelphiTime(cls, delphi_time):
"""Converts a Delphi time to a timestamp.
In Delphi, time and date values (TDateTime)
are stored in a unsigned little endian 64-bit
floating point containing the number of seconds
since December 30, 1899 at 00:00:00 (midnight) Local Timezone.
TDateTime does not have any time zone information.
Args:
delphi_time: The timestamp in Delphi format.
Returns:
An integer containing the timestamp or 0 on error.
"""
posix_time = (delphi_time - cls.DELPHI_TIME_TO_POSIX_BASE) * 86400.0
if (posix_time < cls.TIMESTAMP_MIN_SECONDS or
posix_time > cls.TIMESTAMP_MAX_SECONDS):
return 0
return cls.FromPosixTime(int(posix_time))
@classmethod
def FromFatDateTime(cls, fat_date_time):
"""Converts a FAT date and time into a timestamp.
FAT date time is mainly used in DOS/Windows file formats and FAT.
The FAT date and time is a 32-bit value containing two 16-bit values:
* The date (lower 16-bit).
* bits 0 - 4: day of month, where 1 represents the first day
* bits 5 - 8: month of year, where 1 represent January
* bits 9 - 15: year since 1980
* The time of day (upper 16-bit).
* bits 0 - 4: seconds (in 2 second intervals)
* bits 5 - 10: minutes
* bits 11 - 15: hours
Args:
fat_date_time: The 32-bit FAT date time.
Returns:
An integer containing the timestamp or 0 on error.
"""
number_of_seconds = cls.FAT_DATE_TO_POSIX_BASE
day_of_month = (fat_date_time & 0x1f) - 1
month = ((fat_date_time >> 5) & 0x0f) - 1
year = (fat_date_time >> 9) & 0x7f
if day_of_month < 0 or day_of_month > 30 or month < 0 or month > 11:
return 0
number_of_days = cls.DayOfYear(day_of_month, month, 1980 + year)
for past_year in range(0, year):
number_of_days += cls.DaysInYear(past_year)
fat_date_time >>= 16
seconds = (fat_date_time & 0x1f) * 2
minutes = (fat_date_time >> 5) & 0x3f
hours = (fat_date_time >> 11) & 0x1f
if hours > 23 or minutes > 59 or seconds > 59:
return 0
number_of_seconds += (((hours * 60) + minutes) * 60) + seconds
number_of_seconds += number_of_days * cls.SECONDS_PER_DAY
return number_of_seconds * cls.MICRO_SECONDS_PER_SECOND
@classmethod
def FromFiletime(cls, filetime):
"""Converts a FILETIME into a timestamp.
FILETIME is mainly used in Windows file formats and NTFS.
The FILETIME is a 64-bit value containing:
100th nano seconds since 1601-01-01 00:00:00
Technically FILETIME consists of 2 x 32-bit parts and is presumed
to be unsigned.
Args:
filetime: The 64-bit FILETIME timestamp.
Returns:
An integer containing the timestamp or 0 on error.
"""
# TODO: Add a handling for if the timestamp equals to zero.
if filetime < 0:
return 0
timestamp = (filetime - cls.FILETIME_TO_POSIX_BASE) / 10
if timestamp > cls.TIMESTAMP_MAX_MICRO_SECONDS:
return 0
return timestamp
@classmethod
def FromHfsTime(cls, hfs_time, timezone=pytz.utc, is_dst=False):
"""Converts a HFS time to a timestamp.
HFS time is the same as HFS+ time, except stored in the local
timezone of the user.
Args:
hfs_time: Timestamp in the hfs format (32 bit unsigned int).
timezone: The timezone object of the system's local time.
is_dst: A boolean to indicate the timestamp is corrected for daylight
savings time (DST) only used for the DST transition period.
The default is false.
Returns:
An integer containing the timestamp or 0 on error.
"""
timestamp_local = cls.FromHfsPlusTime(hfs_time)
return cls.LocaltimeToUTC(timestamp_local, timezone, is_dst)
@classmethod
def FromHfsPlusTime(cls, hfs_time):
"""Converts a HFS+ time to a timestamp.
In HFS+ date and time values are stored in an unsigned 32-bit integer
containing the number of seconds since January 1, 1904 at 00:00:00
(midnight) UTC (GMT).
Args:
hfs_time: The timestamp in HFS+ format.
Returns:
An integer containing the timestamp or 0 on error.
"""
return cls.FromPosixTime(hfs_time - cls.HFSTIME_TO_POSIX_BASE)
@classmethod
def FromJavaTime(cls, java_time):
"""Converts a Java time to a timestamp.
Jave time is the number of milliseconds since
January 1, 1970, 00:00:00 UTC.
URL: http://docs.oracle.com/javase/7/docs/api/
java/sql/Timestamp.html#getTime%28%29
Args:
java_time: The Java Timestamp.
Returns:
An integer containing the timestamp or 0 on error.
"""
return java_time * cls.MILLI_SECONDS_TO_MICRO_SECONDS
@classmethod
def FromPosixTime(cls, posix_time):
"""Converts a POSIX timestamp into a timestamp.
The POSIX time is a signed 32-bit or 64-bit value containing:
seconds since 1970-01-01 00:00:00
Args:
posix_time: The POSIX timestamp.
Returns:
An integer containing the timestamp or 0 on error.
"""
if (posix_time < cls.TIMESTAMP_MIN_SECONDS or
posix_time > cls.TIMESTAMP_MAX_SECONDS):
return 0
return int(posix_time) * cls.MICRO_SECONDS_PER_SECOND
@classmethod
def FromPosixTimeWithMicrosecond(cls, posix_time, microsecond):
"""Converts a POSIX timestamp with microsecond into a timestamp.
The POSIX time is a signed 32-bit or 64-bit value containing:
seconds since 1970-01-01 00:00:00
Args:
posix_time: The POSIX timestamp.
microsecond: The microseconds to add to the timestamp.
Returns:
An integer containing the timestamp or 0 on error.
"""
timestamp = cls.FromPosixTime(posix_time)
if not timestamp:
return 0
return timestamp + microsecond
@classmethod
def FromPythonDatetime(cls, datetime_object):
"""Converts a Python datetime object into a timestamp."""
if not isinstance(datetime_object, datetime.datetime):
return 0
posix_epoch = int(calendar.timegm(datetime_object.utctimetuple()))
epoch = cls.FromPosixTime(posix_epoch)
return epoch + datetime_object.microsecond
@classmethod
def FromTimeParts(
cls, year, month, day, hour, minutes, seconds, microseconds=0,
timezone=pytz.utc):
"""Converts a list of time entries to a timestamp.
Args:
year: An integer representing the year.
month: An integer between 1 and 12.
day: An integer representing the number of day in the month.
hour: An integer representing the hour, 0 <= hour < 24.
minutes: An integer, 0 <= minute < 60.
seconds: An integer, 0 <= second < 60.
microseconds: Optional number of microseconds ranging from:
0 <= microsecond < 1000000. The default is 0.
timezone: Optional timezone (instance of pytz.timezone).
The default is UTC.
Returns:
An integer containing the timestamp or 0 on error.
"""
try:
date = datetime.datetime(
year, month, day, hour, minutes, seconds, microseconds)
except ValueError as exception:
logging.warning((
u'Unable to create timestamp from {0:04d}-{1:02d}-{2:02d} '
u'{3:02d}:{4:02d}:{5:02d}.{6:06d} with error: {7:s}').format(
year, month, day, hour, minutes, seconds, microseconds,
exception))
return 0
if type(timezone) is str:
timezone = pytz.timezone(timezone)
date_use = timezone.localize(date)
epoch = int(calendar.timegm(date_use.utctimetuple()))
return cls.FromPosixTime(epoch) + microseconds
@classmethod
def FromTimeString(
cls, time_string, timezone=pytz.utc, dayfirst=False,
gmt_as_timezone=True):
"""Converts a string containing a date and time value into a timestamp.
Args:
time_string: String that contains a date and time value.
timezone: Optional timezone object (instance of pytz.timezone) that
the data and time value in the string represents. This value
is used when the timezone cannot be determined from the string.
dayfirst: An optional boolean argument. If set to true then the
parser will change the precedence in which it parses timestamps
from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be
YYYY-DD-MM, etc).
gmt_as_timezone: Sometimes the dateutil parser will interpret GMT and UTC
the same way, that is not make a distinction. By default
this is set to true, that is GMT can be intepreted
differently than UTC. If that is not the expected result
this attribute can be set to false.
Returns:
An integer containing the timestamp or 0 on error.
"""
datetime_object = StringToDatetime(
time_string, timezone=timezone, dayfirst=dayfirst,
gmt_as_timezone=gmt_as_timezone)
return cls.FromPythonDatetime(datetime_object)
@classmethod
def FromWebKitTime(cls, webkit_time):
"""Converts a WebKit time into a timestamp.
The WebKit time is a 64-bit value containing:
micro seconds since 1601-01-01 00:00:00
Args:
webkit_time: The 64-bit WebKit time timestamp.
Returns:
An integer containing the timestamp or 0 on error.
"""
if webkit_time < (cls.TIMESTAMP_MIN_MICRO_SECONDS +
cls.WEBKIT_TIME_TO_POSIX_BASE):
return 0
return webkit_time - cls.WEBKIT_TIME_TO_POSIX_BASE
@classmethod
def GetNow(cls):
"""Retrieves the current time (now) as a timestamp in UTC."""
time_elements = time.gmtime()
return calendar.timegm(time_elements) * 1000000
@classmethod
def IsLeapYear(cls, year):
"""Determines if a year is a leap year.
A leap year is dividable by 4 and not by 100 or by 400
without a remainder.
Args:
year: The year as in 1970.
Returns:
A boolean value indicating the year is a leap year.
"""
return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0
@classmethod
def LocaltimeToUTC(cls, timestamp, timezone, is_dst=False):
"""Converts the timestamp in localtime of the timezone to UTC.
Args:
timestamp: An integer containing the timestamp.
timezone: The timezone (pytz.timezone) object.
is_dst: A boolean to indicate the timestamp is corrected for daylight
savings time (DST) only used for the DST transition period.
The default is false.
Returns:
An integer containing the timestamp or 0 on error.
"""
if timezone and timezone != pytz.utc:
datetime_object = (
datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=None) +
datetime.timedelta(microseconds=timestamp))
# Check if timezone is UTC since utcoffset() does not support is_dst
# for UTC and will raise.
datetime_delta = timezone.utcoffset(datetime_object, is_dst=is_dst)
seconds_delta = int(datetime_delta.total_seconds())
timestamp -= seconds_delta * cls.MICRO_SECONDS_PER_SECOND
return timestamp
@classmethod
def RoundToSeconds(cls, timestamp):
"""Takes a timestamp value and rounds it to a second precision."""
leftovers = timestamp % cls.MICRO_SECONDS_PER_SECOND
scrubbed = timestamp - leftovers
rounded = round(float(leftovers) / cls.MICRO_SECONDS_PER_SECOND)
return int(scrubbed + rounded * cls.MICRO_SECONDS_PER_SECOND)
def StringToDatetime(
time_string, timezone=pytz.utc, dayfirst=False, gmt_as_timezone=True):
"""Converts a string representation of a timestamp into a datetime object.
Args:
time_string: String that contains a date and time value.
timezone: Optional timezone object (instance of pytz.timezone) that
the data and time value in the string represents. This value
is used when the timezone cannot be determined from the string.
dayfirst: An optional boolean argument. If set to true then the
parser will change the precedence in which it parses timestamps
from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be YYYY-DD-MM,
etc).
gmt_as_timezone: Sometimes the dateutil parser will interpret GMT and UTC
the same way, that is not make a distinction. By default
this is set to true, that is GMT can be intepreted
differently than UTC. If that is not the expected result
this attribute can be set to false.
Returns:
A datetime object.
"""
if not gmt_as_timezone and time_string.endswith(' GMT'):
time_string = u'{0:s}UTC'.format(time_string[:-3])
try:
datetime_object = dateutil.parser.parse(time_string, dayfirst=dayfirst)
except (TypeError, ValueError) as exception:
logging.error(
u'Unable to copy {0:s} to a datetime object with error: {1:s}'.format(
time_string, exception))
return datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=pytz.utc)
if datetime_object.tzinfo:
return datetime_object.astimezone(pytz.utc)
return timezone.localize(datetime_object)
def GetCurrentYear():
"""Determines the current year."""
datetime_object = datetime.datetime.now()
return datetime_object.year
+531
View File
@@ -0,0 +1,531 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a unit test for the timelib in Plaso."""
import calendar
import datetime
import unittest
from plaso.lib import timelib
import pytz
def CopyStringToTimestamp(time_string):
"""Copies a string containing a date and time value to a timestamp.
Test function that does not rely on dateutil parser.
Args:
time_string: A string containing a date and time value formatted as:
YYYY-MM-DD hh:mm:ss.######[+-]##:##
Where # are numeric digits ranging from 0 to 9 and the seconds
fraction can be either 3 or 6 digits. Both the seconds fraction
and timezone offset are optional. The default timezone is UTC.
Returns:
An integer containing the timestamp.
Raises:
ValueError: if the time string is invalid or not supported.
"""
time_string_length = len(time_string)
# The time string should at least contain 'YYYY-MM-DD hh:mm:ss'.
if (time_string_length < 19 or time_string[4] != '-' or
time_string[7] != '-' or time_string[10] != ' ' or
time_string[13] != ':' or time_string[16] != ':'):
raise ValueError(u'Invalid time string.')
try:
year = int(time_string[0:4], 10)
except ValueError:
raise ValueError(u'Unable to parse year.')
try:
month = int(time_string[5:7], 10)
except ValueError:
raise ValueError(u'Unable to parse month.')
if month not in range(1, 13):
raise ValueError(u'Month value out of bounds.')
try:
day_of_month = int(time_string[8:10], 10)
except ValueError:
raise ValueError(u'Unable to parse day of month.')
if day_of_month not in range(1, 32):
raise ValueError(u'Day of month value out of bounds.')
try:
hours = int(time_string[11:13], 10)
except ValueError:
raise ValueError(u'Unable to parse hours.')
if hours not in range(0, 24):
raise ValueError(u'Hours value out of bounds.')
try:
minutes = int(time_string[14:16], 10)
except ValueError:
raise ValueError(u'Unable to parse minutes.')
if minutes not in range(0, 60):
raise ValueError(u'Minutes value out of bounds.')
try:
seconds = int(time_string[17:19], 10)
except ValueError:
raise ValueError(u'Unable to parse day of seconds.')
if seconds not in range(0, 60):
raise ValueError(u'Seconds value out of bounds.')
micro_seconds = 0
timezone_offset = 0
if time_string_length > 19:
if time_string[19] != '.':
timezone_index = 19
else:
for timezone_index in range(19, time_string_length):
if time_string[timezone_index] in ['+', '-']:
break
# The calculation that follow rely on the timezone index to point
# beyond the string in case no timezone offset was defined.
if timezone_index == time_string_length - 1:
timezone_index += 1
if timezone_index > 19:
fraction_of_seconds_length = timezone_index - 20
if fraction_of_seconds_length not in [3, 6]:
raise ValueError(u'Invalid time string.')
try:
micro_seconds = int(time_string[20:timezone_index], 10)
except ValueError:
raise ValueError(u'Unable to parse fraction of seconds.')
if fraction_of_seconds_length == 3:
micro_seconds *= 1000
if timezone_index < time_string_length:
if (time_string_length - timezone_index != 6 or
time_string[timezone_index + 3] != ':'):
raise ValueError(u'Invalid time string.')
try:
timezone_offset = int(time_string[
timezone_index + 1:timezone_index + 3])
except ValueError:
raise ValueError(u'Unable to parse timezone hours offset.')
if timezone_offset not in range(0, 24):
raise ValueError(u'Timezone hours offset value out of bounds.')
# Note that when the sign of the timezone offset is negative
# the difference needs to be added. We do so by flipping the sign.
if time_string[timezone_index] == '-':
timezone_offset *= 60
else:
timezone_offset *= -60
try:
timezone_offset += int(time_string[
timezone_index + 4:timezone_index + 6])
except ValueError:
raise ValueError(u'Unable to parse timezone minutes offset.')
timezone_offset *= 60
timestamp = int(calendar.timegm((
year, month, day_of_month, hours, minutes, seconds)))
return ((timestamp + timezone_offset) * 1000000) + micro_seconds
class TimeLibUnitTest(unittest.TestCase):
"""A unit test for the timelib."""
def testCocoaTime(self):
"""Tests the Cocoa timestamp conversion."""
self.assertEquals(
timelib.Timestamp.FromCocoaTime(395011845),
CopyStringToTimestamp('2013-07-08 21:30:45'))
self.assertEquals(
timelib.Timestamp.FromCocoaTime(395353142),
CopyStringToTimestamp('2013-07-12 20:19:02'))
self.assertEquals(
timelib.Timestamp.FromCocoaTime(394993669),
CopyStringToTimestamp('2013-07-08 16:27:49'))
def testHFSTimes(self):
"""Tests the HFS timestamp conversion."""
self.assertEquals(
timelib.Timestamp.FromHfsTime(
3458215528, timezone=pytz.timezone('EST5EDT'), is_dst=True),
CopyStringToTimestamp('2013-08-01 15:25:28-04:00'))
self.assertEquals(
timelib.Timestamp.FromHfsPlusTime(3458215528),
CopyStringToTimestamp('2013-08-01 15:25:28'))
self.assertEquals(
timelib.Timestamp.FromHfsPlusTime(3413373928),
CopyStringToTimestamp('2012-02-29 15:25:28'))
def testTimestampIsLeapYear(self):
"""Tests the is leap year check."""
self.assertEquals(timelib.Timestamp.IsLeapYear(2012), True)
self.assertEquals(timelib.Timestamp.IsLeapYear(2013), False)
self.assertEquals(timelib.Timestamp.IsLeapYear(2000), True)
self.assertEquals(timelib.Timestamp.IsLeapYear(1900), False)
def testTimestampDaysInMonth(self):
"""Tests the days in month function."""
self.assertEquals(timelib.Timestamp.DaysInMonth(0, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(1, 2013), 28)
self.assertEquals(timelib.Timestamp.DaysInMonth(1, 2012), 29)
self.assertEquals(timelib.Timestamp.DaysInMonth(2, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(3, 2013), 30)
self.assertEquals(timelib.Timestamp.DaysInMonth(4, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(5, 2013), 30)
self.assertEquals(timelib.Timestamp.DaysInMonth(6, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(7, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(8, 2013), 30)
self.assertEquals(timelib.Timestamp.DaysInMonth(9, 2013), 31)
self.assertEquals(timelib.Timestamp.DaysInMonth(10, 2013), 30)
self.assertEquals(timelib.Timestamp.DaysInMonth(11, 2013), 31)
with self.assertRaises(ValueError):
timelib.Timestamp.DaysInMonth(-1, 2013)
with self.assertRaises(ValueError):
timelib.Timestamp.DaysInMonth(12, 2013)
def testTimestampDaysInYear(self):
"""Test the days in year function."""
self.assertEquals(timelib.Timestamp.DaysInYear(2013), 365)
self.assertEquals(timelib.Timestamp.DaysInYear(2012), 366)
def testTimestampDayOfYear(self):
"""Test the day of year function."""
self.assertEquals(timelib.Timestamp.DayOfYear(0, 0, 2013), 0)
self.assertEquals(timelib.Timestamp.DayOfYear(0, 2, 2013), 31 + 28)
self.assertEquals(timelib.Timestamp.DayOfYear(0, 2, 2012), 31 + 29)
self.assertEquals(timelib.Timestamp.DayOfYear(0, 11, 2013),
31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30)
def testTimestampFromDelphiTime(self):
"""Test the Delphi date time conversion."""
self.assertEquals(
timelib.Timestamp.FromDelphiTime(41443.8263953),
CopyStringToTimestamp('2013-06-18 19:50:00'))
def testTimestampFromFatDateTime(self):
"""Test the FAT date time conversion."""
self.assertEquals(
timelib.Timestamp.FromFatDateTime(0xa8d03d0c),
CopyStringToTimestamp('2010-08-12 21:06:32'))
# Invalid number of seconds.
fat_date_time = (0xa8d03d0c & ~(0x1f << 16)) | ((30 & 0x1f) << 16)
self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0)
# Invalid number of minutes.
fat_date_time = (0xa8d03d0c & ~(0x3f << 21)) | ((60 & 0x3f) << 21)
self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0)
# Invalid number of hours.
fat_date_time = (0xa8d03d0c & ~(0x1f << 27)) | ((24 & 0x1f) << 27)
self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0)
# Invalid day of month.
fat_date_time = (0xa8d03d0c & ~0x1f) | (32 & 0x1f)
self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0)
# Invalid month.
fat_date_time = (0xa8d03d0c & ~(0x0f << 5)) | ((13 & 0x0f) << 5)
self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0)
def testTimestampFromWebKitTime(self):
"""Test the WebKit time conversion."""
self.assertEquals(
timelib.Timestamp.FromWebKitTime(0x2dec3d061a9bfb),
CopyStringToTimestamp('2010-08-12 21:06:31.546875'))
webkit_time = 86400 * 1000000
self.assertEquals(
timelib.Timestamp.FromWebKitTime(webkit_time),
CopyStringToTimestamp('1601-01-02 00:00:00'))
# WebKit time that exceeds lower bound.
webkit_time = -((1 << 63L) - 1)
self.assertEquals(timelib.Timestamp.FromWebKitTime(webkit_time), 0)
def testTimestampFromFiletime(self):
"""Test the FILETIME conversion."""
self.assertEquals(
timelib.Timestamp.FromFiletime(0x01cb3a623d0a17ce),
CopyStringToTimestamp('2010-08-12 21:06:31.546875'))
filetime = 86400 * 10000000
self.assertEquals(
timelib.Timestamp.FromFiletime(filetime),
CopyStringToTimestamp('1601-01-02 00:00:00'))
# FILETIME that exceeds lower bound.
filetime = -1
self.assertEquals(timelib.Timestamp.FromFiletime(filetime), 0)
def testTimestampFromPosixTime(self):
"""Test the POSIX time conversion."""
self.assertEquals(
timelib.Timestamp.FromPosixTime(1281647191),
CopyStringToTimestamp('2010-08-12 21:06:31'))
self.assertEquals(
timelib.Timestamp.FromPosixTime(-122557518),
timelib.Timestamp.FromTimeString('1966-02-12 1966 12:14:42 UTC'))
# POSIX time that exceeds upper bound.
self.assertEquals(timelib.Timestamp.FromPosixTime(9223372036855), 0)
# POSIX time that exceeds lower bound.
self.assertEquals(timelib.Timestamp.FromPosixTime(-9223372036855), 0)
def testMonthDict(self):
"""Test the month dict, both inside and outside of scope."""
self.assertEquals(timelib.MONTH_DICT['nov'], 11)
self.assertEquals(timelib.MONTH_DICT['jan'], 1)
self.assertEquals(timelib.MONTH_DICT['may'], 5)
month = timelib.MONTH_DICT.get('doesnotexist')
self.assertEquals(month, None)
def testLocaltimeToUTC(self):
"""Test the localtime to UTC conversion."""
timezone = pytz.timezone('CET')
local_timestamp = CopyStringToTimestamp('2013-01-01 01:00:00')
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone),
CopyStringToTimestamp('2013-01-01 00:00:00'))
local_timestamp = CopyStringToTimestamp('2013-07-01 02:00:00')
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone),
CopyStringToTimestamp('2013-07-01 00:00:00'))
# In the local timezone this is a non-existent timestamp.
local_timestamp = CopyStringToTimestamp('2013-03-31 02:00:00')
with self.assertRaises(pytz.NonExistentTimeError):
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone, is_dst=None)
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(
local_timestamp, timezone, is_dst=True),
CopyStringToTimestamp('2013-03-31 00:00:00'))
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(
local_timestamp, timezone, is_dst=False),
CopyStringToTimestamp('2013-03-31 01:00:00'))
# In the local timezone this is an ambiguous timestamp.
local_timestamp = CopyStringToTimestamp('2013-10-27 02:30:00')
with self.assertRaises(pytz.AmbiguousTimeError):
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone, is_dst=None)
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(
local_timestamp, timezone, is_dst=True),
CopyStringToTimestamp('2013-10-27 00:30:00'))
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone),
CopyStringToTimestamp('2013-10-27 01:30:00'))
# Use the UTC timezone.
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(local_timestamp, pytz.utc),
local_timestamp)
# Use a timezone in the Western Hemisphere.
timezone = pytz.timezone('EST')
local_timestamp = CopyStringToTimestamp('2013-01-01 00:00:00')
self.assertEquals(
timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone),
CopyStringToTimestamp('2013-01-01 05:00:00'))
def testCopyToDatetime(self):
"""Test the copy to datetime object."""
timezone = pytz.timezone('CET')
timestamp = CopyStringToTimestamp('2013-03-14 20:20:08.850041')
self.assertEquals(
timelib.Timestamp.CopyToDatetime(timestamp, timezone),
datetime.datetime(2013, 3, 14, 21, 20, 8, 850041, tzinfo=timezone))
def testCopyToPosix(self):
"""Test converting microseconds to seconds."""
timestamp = CopyStringToTimestamp('2013-10-01 12:00:00')
self.assertEquals(
timelib.Timestamp.CopyToPosix(timestamp),
timestamp // 1000000)
def testTimestampFromTimeString(self):
"""The the FromTimeString function."""
# Test daylight savings.
expected_timestamp = CopyStringToTimestamp('2013-10-01 12:00:00')
# Check certain variance of this timestamp.
timestamp = timelib.Timestamp.FromTimeString(
'2013-10-01 14:00:00', pytz.timezone('Europe/Rome'))
self.assertEquals(timestamp, expected_timestamp)
timestamp = timelib.Timestamp.FromTimeString(
'2013-10-01 12:00:00', pytz.timezone('UTC'))
self.assertEquals(timestamp, expected_timestamp)
timestamp = timelib.Timestamp.FromTimeString(
'2013-10-01 05:00:00', pytz.timezone('PST8PDT'))
self.assertEquals(timestamp, expected_timestamp)
# Now to test outside of the daylight savings.
expected_timestamp = CopyStringToTimestamp('2014-02-01 12:00:00')
timestamp = timelib.Timestamp.FromTimeString(
'2014-02-01 13:00:00', pytz.timezone('Europe/Rome'))
self.assertEquals(timestamp, expected_timestamp)
timestamp = timelib.Timestamp.FromTimeString(
'2014-02-01 12:00:00', pytz.timezone('UTC'))
self.assertEquals(timestamp, expected_timestamp)
timestamp = timelib.Timestamp.FromTimeString(
'2014-02-01 04:00:00', pytz.timezone('PST8PDT'))
self.assertEquals(timestamp, expected_timestamp)
# Define two timestamps, one being GMT and the other UTC.
time_string_utc = 'Wed 05 May 2010 03:52:31 UTC'
time_string_gmt = 'Wed 05 May 2010 03:52:31 GMT'
timestamp_utc = timelib.Timestamp.FromTimeString(time_string_utc)
timestamp_gmt = timelib.Timestamp.FromTimeString(time_string_gmt)
# Test if these two are different, and if so, then we'll try again
# using the 'gmt_is_utc' flag, which then should result to the same
# results.
if timestamp_utc != timestamp_gmt:
self.assertEquals(timestamp_utc, timelib.Timestamp.FromTimeString(
time_string_gmt, gmt_as_timezone=False))
def testRoundTimestamp(self):
"""Test the RoundToSeconds function."""
# Should be rounded up.
test_one = 442813351785412
# Should be rounded down.
test_two = 1384381247271976
self.assertEquals(
timelib.Timestamp.RoundToSeconds(test_one), 442813352000000)
self.assertEquals(
timelib.Timestamp.RoundToSeconds(test_two), 1384381247000000)
def testTimestampFromTimeParts(self):
"""Test the FromTimeParts function."""
timestamp = timelib.Timestamp.FromTimeParts(
2013, 6, 25, 22, 19, 46, 0, timezone=pytz.timezone('PST8PDT'))
self.assertEquals(
timestamp, CopyStringToTimestamp('2013-06-25 22:19:46-07:00'))
timestamp = timelib.Timestamp.FromTimeParts(2013, 6, 26, 5, 19, 46)
self.assertEquals(
timestamp, CopyStringToTimestamp('2013-06-26 05:19:46'))
timestamp = timelib.Timestamp.FromTimeParts(
2013, 6, 26, 5, 19, 46, 542)
self.assertEquals(
timestamp, CopyStringToTimestamp('2013-06-26 05:19:46.000542'))
def _TestStringToDatetime(
self, expected_timestamp, time_string, timezone=pytz.utc, dayfirst=False):
"""Tests the StringToDatetime function.
Args:
expected_timestamp: The expected timesamp.
time_string: String that contains a date and time value.
timezone: The timezone (pytz.timezone) object.
dayfirst: Change precedence of day vs. month.
Returns:
A result object.
"""
date_time = timelib.StringToDatetime(
time_string, timezone=timezone, dayfirst=dayfirst)
timestamp = int(calendar.timegm((date_time.utctimetuple())))
self.assertEquals(timestamp, expected_timestamp)
def testStringToDatetime(self):
"""Test the StringToDatetime function."""
self._TestStringToDatetime(
471953580, '12-15-1984 05:13:00', timezone=pytz.timezone('EST5EDT'))
# Swap day and month.
self._TestStringToDatetime(
466420380, '12-10-1984 05:13:00', timezone=pytz.timezone('EST5EDT'),
dayfirst=True)
self._TestStringToDatetime(471953580, '12-15-1984 10:13:00Z')
# Setting the timezone for string that already contains a timezone
# indicator should not affect the conversion.
self._TestStringToDatetime(
471953580, '12-15-1984 10:13:00Z', timezone=pytz.timezone('EST5EDT'))
self._TestStringToDatetime(471953580, '15/12/1984 10:13:00Z')
self._TestStringToDatetime(471953580, '15-12-84 10:13:00Z')
self._TestStringToDatetime(
471967980, '15-12-84 10:13:00-04', timezone=pytz.timezone('EST5EDT'))
self._TestStringToDatetime(
0, 'thisisnotadatetime', timezone=pytz.timezone('EST5EDT'))
self._TestStringToDatetime(
471953580, '12-15-1984 04:13:00',
timezone=pytz.timezone('America/Chicago'))
self._TestStringToDatetime(
458712780, '07-14-1984 23:13:00',
timezone=pytz.timezone('America/Chicago'))
self._TestStringToDatetime(
471964380, '12-15-1984 05:13:00', timezone=pytz.timezone('US/Pacific'))
if __name__ == '__main__':
unittest.main()
+199
View File
@@ -0,0 +1,199 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains utility functions."""
import logging
from plaso.lib import errors
from plaso.lib import lexer
RESERVED_VARIABLES = frozenset(
['username', 'inode', 'hostname', 'body', 'parser', 'regvalue', 'timestamp',
'timestamp_desc', 'source_short', 'source_long', 'timezone', 'filename',
'display_name', 'pathspec', 'offset', 'store_number', 'store_index',
'tag', 'data_type', 'metadata', 'http_headers', 'query', 'mapped_files',
'uuid'])
def IsText(bytes_in, encoding=None):
"""Examine the bytes in and determine if they are indicative of a text.
Parsers need quick and at least semi reliable method of discovering whether
or not a particular byte stream is a text or resembles text or not. This can
be used in text parsers to determine if a file is a text file or not for
instance.
The method assumes the byte sequence is either ASCII, UTF-8, UTF-16 or method
supplied character encoding. Otherwise it will make the assumption the byte
sequence is not text, but a byte sequence.
Args:
bytes_in: The byte sequence passed to the method that needs examination.
encoding: Optional encoding to test, if not defined only ASCII, UTF-8 and
UTF-16 are tried.
Returns:
Boolean value indicating whether or not the byte sequence is a text or not.
"""
# TODO: Improve speed and accuracy of this method.
# Start with the assumption we are dealing with a text.
is_ascii = True
# Check if this is ASCII text string.
for char in bytes_in:
if not 31 < ord(char) < 128:
is_ascii = False
break
# We have an ASCII string.
if is_ascii:
return is_ascii
# Is this already a unicode text?
if type(bytes_in) == unicode:
return True
# Check if this is UTF-8
try:
_ = bytes_in.decode('utf-8')
return True
except UnicodeDecodeError:
pass
# TODO: UTF 16 decode is successful in too
# many edge cases where we are not really dealing with
# a text at all. Leaving this out for now, consider
# re-enabling or making a better determination.
#try:
# _ = bytes_in.decode('utf-16-le')
# return True
#except UnicodeDecodeError:
# pass
if encoding:
try:
_ = bytes_in.decode(encoding)
return True
except UnicodeDecodeError:
pass
except LookupError:
logging.error(
u'String encoding not recognized: {0:s}'.format(encoding))
return False
def GetBaseName(path):
"""Returns back a basename for a path (could be Windows or *NIX separated)."""
# First check the case where both forward and backward slash are in the path.
if '/' and '\\' in path:
# Let's count slashes and guess which one is the right one.
forward_count = len(path.split('/'))
backward_count = len(path.split('\\'))
if forward_count > backward_count:
_, _, base = path.rpartition('/')
else:
_, _, base = path.rpartition('\\')
return base
# Now we are sure there is only one type of separators.
if '/' in path:
_, _, base = path.rpartition('/')
else:
_, _, base = path.rpartition('\\')
return base
def GetUnicodeString(string):
"""Converts the string to Unicode if necessary."""
if type(string) != unicode:
return str(string).decode('utf8', 'ignore')
return string
class PathReplacer(lexer.Lexer):
"""Replace path variables with values gathered from earlier preprocessing."""
tokens = [
lexer.Token('.', '{{([^}]+)}}', 'ReplaceVariable', ''),
lexer.Token('.', '{([^}]+)}', 'ReplaceString', ''),
lexer.Token('.', '([^{])', 'ParseString', ''),
]
def __init__(self, pre_obj, data=''):
"""Constructor for a path replacer."""
super(PathReplacer, self).__init__(data)
self._path = []
self._pre_obj = pre_obj
def GetPath(self):
"""Run the lexer and replace path."""
while True:
_ = self.NextToken()
if self.Empty():
break
return u''.join(self._path)
def ParseString(self, match, **_):
"""Append a string to the path."""
self._path.append(match.group(1))
def ReplaceVariable(self, match, **_):
"""Replace a string that should not be a variable."""
self._path.append(u'{{{0:s}}}'.format(match.group(1)))
def ReplaceString(self, match, **_):
"""Replace a variable with a given attribute."""
replace = getattr(self._pre_obj, match.group(1), None)
if replace:
self._path.append(replace)
else:
raise errors.PathNotFound(
u'Path variable: {} not discovered yet.'.format(match.group(1)))
def GetInodeValue(inode_raw):
"""Read in a 'raw' inode value and try to convert it into an integer.
Args:
inode_raw: A string or an int inode value.
Returns:
An integer inode value.
"""
if type(inode_raw) in (int, long):
return inode_raw
if type(inode_raw) is float:
return int(inode_raw)
try:
return int(inode_raw)
except ValueError:
# Let's do one more attempt.
inode_string, _, _ = str(inode_raw).partition('-')
try:
return int(inode_string)
except ValueError:
return -1
+48
View File
@@ -0,0 +1,48 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains the unit tests for the utils library of methods."""
import unittest
from plaso.lib import utils
class UtilsTestCase(unittest.TestCase):
"""The unit test for utils method collection."""
def testIsText(self):
"""Test the IsText method."""
bytes_in = 'this is My Weird ASCII and non whatever string.'
self.assertTrue(utils.IsText(bytes_in))
bytes_in = u'Plaso Síar Og Raðar Þessu'
self.assertTrue(utils.IsText(bytes_in))
bytes_in = '\x01\62LSO\xFF'
self.assertFalse(utils.IsText(bytes_in))
bytes_in = 'T\x00h\x00i\x00s\x00\x20\x00'
self.assertTrue(utils.IsText(bytes_in))
bytes_in = 'Ascii\x00'
self.assertTrue(utils.IsText(bytes_in))
bytes_in = 'Ascii Start then...\x00\x99\x23'
self.assertFalse(utils.IsText(bytes_in))
if __name__ == '__main__':
unittest.main()