plaso-rubanetra/plaso/output/mysql_4n6.py
2020-04-06 18:48:34 +02:00

403 lines
14 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
import sys
import MySQLdb
from plaso import formatters
from plaso.formatters import interface as formatters_interface
from plaso.formatters import manager as formatters_manager
from plaso.lib import errors
from plaso.lib import output
from plaso.lib import timelib
from plaso.lib import utils
from plaso.output import helper
__author__ = 'David Nides (david.nides@gmail.com)'
class Mysql4n6(output.LogOutputFormatter):
"""Contains functions for outputting as 4n6time MySQL database."""
FORMAT_ATTRIBUTE_RE = re.compile('{([^}]+)}')
META_FIELDS = ['sourcetype', 'source', 'user', 'host', 'MACB',
'color', 'type', 'record_number']
ARGUMENTS = [
('--db_user', {
'dest': 'db_user',
'type': unicode,
'help': 'Defines the database user.',
'metavar': 'USERNAME',
'action': 'store',
'default': 'root'}),
('--db_host', {
'dest': 'db_host',
'metavar': 'HOSTNAME',
'type': unicode,
'help': (
'Defines the IP address or the hostname of the database '
'server.'),
'action': 'store',
'default': 'localhost'}),
('--db_pass', {
'dest': 'db_pass',
'metavar': 'PASSWORD',
'type': unicode,
'help': 'The password for the database user.',
'action': 'store',
'default': 'forensic'}),
('--db_name', {
'dest': 'db_name',
'type': unicode,
'help': 'The name of the database to connect to.',
'action': 'store',
'default': 'log2timeline'}),
('--append', {
'dest': 'append',
'action': 'store_true',
'help': (
'Defines whether the intention is to append to an already '
'existing database or overwrite it. Defaults to overwrite.'),
'default': False}),
('--fields', {
'dest': 'fields',
'action': 'store',
'type': unicode,
'nargs': '*',
'help': 'Defines which fields should be indexed in the database.',
'default': [
'host', 'user', 'source', 'sourcetype', 'type', 'datetime',
'color']}),
('--evidence', {
'dest': 'evidence',
'action': 'store',
'help': (
'Set the evidence field to a specific value, defaults to '
'empty.'),
'type': unicode,
'default': '-'})]
def __init__(self, store, filehandle=sys.stdout, config=None,
filter_use=None):
"""Constructor for the output module.
Args:
store: The storage object.
filehandle: A file-like object that can be written to.
config: The configuration object for the module.
filter_use: The filter object used.
"""
# TODO: Add a unit test for this output module.
super(Mysql4n6, self).__init__(store, filehandle, config, filter_use)
# TODO: move this to an output module interface.
self._formatters_manager = formatters_manager.EventFormatterManager
self.set_status = getattr(config, 'set_status', None)
self.host = getattr(config, 'db_host', 'localhost')
self.user = getattr(config, 'db_user', 'root')
self.password = getattr(config, 'db_pass', 'forensic')
self.dbname = getattr(config, 'db_name', 'log2timeline')
self.evidence = getattr(config, 'evidence', '-')
self.append = getattr(config, 'append', False)
self.fields = getattr(config, 'fields', [
'host', 'user', 'source', 'sourcetype', 'type', 'datetime', 'color'])
def Start(self):
"""Connect to the database and create the table before inserting."""
if self.dbname == '':
raise IOError(u'Specify a database name.')
try:
if self.append:
self.conn = MySQLdb.connect(self.host, self.user,
self.password, self.dbname)
self.curs = self.conn.cursor()
else:
self.conn = MySQLdb.connect(self.host, self.user, self.password)
self.curs = self.conn.cursor()
self.conn.set_character_set(u'utf8')
self.curs.execute(u'SET NAMES utf8')
self.curs.execute(u'SET CHARACTER SET utf8')
self.curs.execute(u'SET character_set_connection=utf8')
self.curs.execute(u'SET GLOBAL innodb_large_prefix=ON')
self.curs.execute(u'SET GLOBAL innodb_file_format=barracuda')
self.curs.execute(u'SET GLOBAL innodb_file_per_table=ON')
self.curs.execute(
u'CREATE DATABASE IF NOT EXISTS {0:s}'.format(self.dbname))
self.curs.execute(u'USE {0:s}'.format(self.dbname))
# Create tables.
self.curs.execute(
(u'CREATE TABLE IF NOT EXISTS log2timeline ('
u'rowid INT NOT NULL AUTO_INCREMENT, timezone VARCHAR(256), '
u'MACB VARCHAR(256), source VARCHAR(256), sourcetype VARCHAR(256), '
u'type VARCHAR(256), user VARCHAR(256), host VARCHAR(256), '
u'description TEXT, filename VARCHAR(256), inode VARCHAR(256), '
u'notes VARCHAR(256), format VARCHAR(256), '
u'extra TEXT, datetime datetime, reportnotes VARCHAR(256), '
u'inreport VARCHAR(256), tag VARCHAR(256), color VARCHAR(256), '
u'offset INT, store_number INT, store_index INT, '
u'vss_store_number INT, URL TEXT, '
u'record_number VARCHAR(256), event_identifier VARCHAR(256), '
u'event_type VARCHAR(256), source_name VARCHAR(256), '
u'user_sid VARCHAR(256), computer_name VARCHAR(256), '
u'evidence VARCHAR(256), '
u'PRIMARY KEY (rowid)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED'))
if self.set_status:
self.set_status(u'Created table: log2timeline')
for field in self.META_FIELDS:
self.curs.execute(
u'CREATE TABLE IF NOT EXISTS l2t_{0}s ({0}s TEXT, frequency INT) '
u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED'.format(field))
if self.set_status:
self.set_status(u'Created table: l2t_{0:s}'.format(field))
self.curs.execute(
u'CREATE TABLE IF NOT EXISTS l2t_tags (tag TEXT) '
u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED')
if self.set_status:
self.set_status(u'Created table: l2t_tags')
self.curs.execute(
u'CREATE TABLE IF NOT EXISTS l2t_saved_query ('
u'name TEXT, query TEXT) '
u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED')
if self.set_status:
self.set_status(u'Created table: l2t_saved_query')
self.curs.execute(
u'CREATE TABLE IF NOT EXISTS l2t_disk ('
u'disk_type INT, mount_path TEXT, '
u'dd_path TEXT, dd_offset TEXT, '
u'storage_file TEXT, export_path TEXT) '
u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED')
self.curs.execute(
u'INSERT INTO l2t_disk ('
u'disk_type, mount_path, dd_path, '
u'dd_offset, storage_file, '
u'export_path) VALUES '
u'(0, "", "", "", "", "")')
if self.set_status:
self.set_status(u'Created table: l2t_disk')
except MySQLdb.Error as exception:
raise IOError(u'Unable to insert into database with error: {0:s}'.format(
exception))
self.count = 0
def End(self):
"""Create indices and commit the transaction."""
# Build up indices for the fields specified in the args.
# It will commit the inserts automatically before creating index.
if not self.append:
for field_name in self.fields:
sql = u'CREATE INDEX {0}_idx ON log2timeline ({0:s})'.format(field_name)
self.curs.execute(sql)
if self.set_status:
self.set_status(u'Created index: {0:s}'.format(field_name))
# Get meta info and save into their tables.
if self.set_status:
self.set_status(u'Creating metadata...')
for field in self.META_FIELDS:
vals = self._GetDistinctValues(field)
self.curs.execute(u'DELETE FROM l2t_{0:s}s'.format(field))
for name, freq in vals.items():
self.curs.execute((
u'INSERT INTO l2t_{0:s}s ({1:s}s, frequency) '
u'VALUES("{2:s}", {3:d}) ').format(field, field, name, freq))
self.curs.execute(u'DELETE FROM l2t_tags')
for tag in self._ListTags():
self.curs.execute(
u'INSERT INTO l2t_tags (tag) VALUES ("{0:s}")'.format(tag))
if self.set_status:
self.set_status(u'Database created.')
self.conn.commit()
self.curs.close()
self.conn.close()
def _GetDistinctValues(self, field_name):
"""Query database for unique field types."""
self.curs.execute(
u'SELECT {0}, COUNT({0}) FROM log2timeline GROUP BY {0}'.format(
field_name))
res = {}
for row in self.curs.fetchall():
if row[0] != '':
res[row[0]] = int(row[1])
return res
def _ListTags(self):
"""Query database for unique tag types."""
all_tags = []
self.curs.execute(
u'SELECT DISTINCT tag FROM log2timeline')
# This cleans up the messy SQL return.
for tag_row in self.curs.fetchall():
tag_string = tag_row[0]
if tag_string:
tags = tag_string.split(',')
for tag in tags:
if tag not in all_tags:
all_tags.append(tag)
return all_tags
def EventBody(self, event_object):
"""Formats data as 4n6time database table format and writes to the db.
Args:
event_object: The event object (EventObject).
Raises:
raise errors.NoFormatterFound: If no formatter for this event is found.
"""
if not hasattr(event_object, 'timestamp'):
return
event_formatter = self._formatters_manager.GetFormatter(event_object)
if not event_formatter:
raise errors.NoFormatterFound(
u'Unable to output event, no event formatter found.')
if (isinstance(
event_formatter, formatters.winreg.WinRegistryGenericFormatter) and
event_formatter.FORMAT_STRING.find('<|>') == -1):
event_formatter.FORMAT_STRING = u'[{keyname}]<|>{text}<|>'
elif isinstance(
event_formatter, formatters_interface.ConditionalEventFormatter):
event_formatter.FORMAT_STRING_SEPARATOR = u'<|>'
elif isinstance(event_formatter, formatters_interface.EventFormatter):
event_formatter.format_string = event_formatter.format_string.replace(
'}', '}<|>')
msg, _ = event_formatter.GetMessages(event_object)
source_short, source_long = event_formatter.GetSources(event_object)
date_use = timelib.Timestamp.CopyToDatetime(
event_object.timestamp, self.zone)
if not date_use:
logging.error(u'Unable to process date for entry: {0:s}'.format(msg))
return
extra = []
format_variables = self.FORMAT_ATTRIBUTE_RE.findall(
event_formatter.format_string)
for key in event_object.GetAttributes():
if key in utils.RESERVED_VARIABLES or key in format_variables:
continue
extra.append(u'{0:s}: {1!s} '.format(
key, getattr(event_object, key, None)))
extra = u' '.join(extra)
inode = getattr(event_object, 'inode', '-')
if inode == '-':
if (hasattr(event_object, 'pathspec') and
hasattr(event_object.pathspec, 'image_inode')):
inode = event_object.pathspec.image_inode
date_use_string = u'{0:d}-{1:d}-{2:d} {3:d}:{4:d}:{5:d}'.format(
date_use.year, date_use.month, date_use.day, date_use.hour,
date_use.minute, date_use.second)
tags = []
if hasattr(event_object, 'tag') and hasattr(event_object.tag, 'tags'):
tags = event_object.tag.tags
else:
tags = u''
taglist = u','.join(tags)
row = (
str(self.zone),
helper.GetLegacy(event_object),
source_short,
source_long,
getattr(event_object, 'timestamp_desc', '-'),
getattr(event_object, 'username', '-'),
getattr(event_object, 'hostname', '-'),
msg,
getattr(event_object, 'filename', '-'),
inode,
getattr(event_object, 'notes', '-'),
getattr(event_object, 'parser', '-'),
extra,
date_use_string,
'',
'',
taglist,
'',
getattr(event_object, 'offset', 0),
event_object.store_number,
event_object.store_index,
self.GetVSSNumber(event_object),
getattr(event_object, 'url', '-'),
getattr(event_object, 'record_number', 0),
getattr(event_object, 'event_identifier', '-'),
getattr(event_object, 'event_type', '-'),
getattr(event_object, 'source_name', '-'),
getattr(event_object, 'user_sid', '-'),
getattr(event_object, 'computer_name', '-'),
self.evidence)
try:
self.curs.execute(
'INSERT INTO log2timeline(timezone, MACB, source, '
'sourcetype, type, user, host, description, filename, '
'inode, notes, format, extra, datetime, reportnotes, '
'inreport, tag, color, offset, store_number, '
'store_index, vss_store_number, URL, record_number, '
'event_identifier, event_type, source_name, user_sid, '
'computer_name, evidence) VALUES ('
'%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '
'%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '
'%s, %s, %s, %s)', row)
except MySQLdb.Error as exception:
logging.warning(
u'Unable to insert into database with error: {0:s}.'.format(
exception))
self.count += 1
# TODO: Experiment if committing the current transaction
# every 10000 inserts is the optimal approach.
if self.count % 10000 == 0:
self.conn.commit()
if self.set_status:
self.set_status(u'Inserting event: {0:d}'.format(self.count))
def GetVSSNumber(self, event_object):
"""Return the vss_store_number of the event."""
if not hasattr(event_object, 'pathspec'):
return -1
return getattr(event_object.pathspec, 'vss_store_number', -1)