Import from old repository

This commit is contained in:
Stefan
2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
This folder contains few extra nuggets, scripts that somehow use the plaso
libraries or other files that can be used with plaso. In other words useful
tools that use the underlying infrastructure to perform actions that are not
part of the original design.
There is no formal setup file for any of the scripts here, these are mostly
provided here as a PoC showing what can be done using the plaso libraries
to extend the tool.
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+242
View File
@@ -0,0 +1,242 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Extract search history from a plaso storage file and enjoy a cup of tea.
A very simple script that takes as an input a plaso storage file
and then tries to extract common search engine history from it and spit
it out to your lovely little screen or a file of your choosings.
"""
import argparse
import locale
import logging
import os
import sys
import urllib
# pylint: disable=unused-import
from plaso import filters
from plaso import formatters
from plaso.lib import output
from plaso.lib import storage
# Here we define filters and callback methods for all hits on each filter.
FILTERS = (
(('source is "WEBHIST" and url iregexp "(www.|encrypted.|/)google." and '
'url contains "search"'), 'GoogleSearch'),
('source is "WEBHIST" and url contains "youtube.com"', 'YouTube'),
(('source is "WEBHIST" and url contains "bing.com" and url contains '
'"search"'), 'BingSearch'),
('source is "WEBHIST" and url contains "mail.google.com"', 'Gmail'),
(('source is "WEBHIST" and url contains "yandex.com" and url contains '
'"yandsearch"'), 'Yandex'),
('source is "WEBHIST" and url contains "duckduckgo.com"', 'DuckDuckGo')
)
def ScrubLine(line):
"""Scrub the line of most obvious HTML codes.
An attempt at taking a line and swapping all instances
of %XX which represent a character in hex with it's
unicode character.
Args:
line: The string that we are about to "fix".
Returns:
String that has it's %XX hex codes swapped for text.
"""
if not line:
return ''
if not '%' in line:
return line
try:
return unicode(urllib.unquote(str(line)), 'utf-8')
except UnicodeDecodeError:
logging.warning(u'Unable to decode line: {0:s}'.format(line))
return line
class FilterClass(object):
"""A class that contains all the parser functions."""
@classmethod
def _GetBetweenQEqualsAndAmbersand(cls, string):
"""Return back string that is defined 'q=' and '&'."""
if 'q=' not in string:
return string
_, _, line = string.partition('q=')
before_and, _, _ = line.partition('&')
if not before_and:
return line
return before_and.split()[0]
@classmethod
def _SearchAndQInLine(cls, string):
"""Return a bool indicating if the words q= and search appear in string."""
if 'search' not in string:
return False
if 'q=' not in string:
return False
return True
@classmethod
def GoogleSearch(cls, url):
"""Return back the extracted string."""
if not cls._SearchAndQInLine(url):
return
line = cls._GetBetweenQEqualsAndAmbersand(url)
if not line:
return
return line.replace('+', ' ')
@classmethod
def YouTube(cls, url):
"""Return back the extracted string."""
return cls.GenericSearch(url)
@classmethod
def BingSearch(cls, url):
"""Return back the extracted string."""
return cls.GenericSearch(url)
@classmethod
def GenericSearch(cls, url):
"""Return back the extracted string from a generic search engine."""
if not cls._SearchAndQInLine(url):
return
return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
@classmethod
def Yandex(cls, url):
"""Return back the results from Yandex search engine."""
if 'text=' not in url:
return
_, _, line = url.partition('text=')
before_and, _, _ = line.partition('&')
if not before_and:
return
yandex_search_url = before_and.split()[0]
return yandex_search_url.replace('+', ' ')
@classmethod
def DuckDuckGo(cls, url):
"""Return back the extracted string."""
if not 'q=' in url:
return
return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
@classmethod
def Gmail(cls, url):
"""Return back the extracted string."""
if 'search/' not in url:
return
_, _, line = url.partition('search/')
first, _, _ = line.partition('/')
second, _, _ = first.partition('?compose')
return second.replace('+', ' ')
def Main():
"""Run the tool."""
arg_parser = argparse.ArgumentParser(
description=(
'plaso_extract_search_history is a simple script that reads the '
'content of a plaso storage file and tries to extract known search '
'engine history from it'))
arg_parser.add_argument(
'-w', '--write', metavar='FILENAME', action='store', dest='output_file',
default='', help='Write results to a file.')
arg_parser.add_argument(
'filename', action='store', metavar='STORAGE_FILE', help=(
'The path to the plaso storage file.'))
options = arg_parser.parse_args()
preferred_encoding = locale.getpreferredencoding()
if preferred_encoding.lower() == 'ascii':
preferred_encoding = 'utf-8'
if not os.path.isfile(options.filename):
raise RuntimeError(u'File {} does not exist'.format(options.filename))
results = {}
result_count = {}
output_filehandle = output.OutputFilehandle(preferred_encoding)
if options.output_file:
output_filehandle.Open(path=options.output_file)
else:
output_filehandle.Open(sys.stdout)
# Build filters.
filter_dict = {}
for filter_str, call_back in FILTERS:
filter_obj = filters.GetFilter(filter_str)
call_back_obj = getattr(FilterClass, call_back, None)
results[call_back] = []
if filter_obj and call_back_obj:
filter_dict[filter_obj] = (call_back, call_back_obj)
with storage.StorageFile(options.filename, read_only=True) as store:
event_object = store.GetSortedEntry()
while event_object:
for filter_obj, call_backs in filter_dict.items():
call_back_name, call_back_object = call_backs
if filter_obj.Match(event_object):
url_attribute = getattr(event_object, 'url', None)
if not url_attribute:
continue
ret_line = ScrubLine(call_back_object(url_attribute))
if not ret_line:
continue
if ret_line in results[call_back_name]:
result_count[u'{}:{}'.format(call_back_name, ret_line)] += 1
else:
results[call_back_name].append(ret_line)
result_count[u'{}:{}'.format(call_back_name, ret_line)] = 1
event_object = store.GetSortedEntry()
for engine_name, result_list in results.items():
results_with_count = []
for result in result_list:
results_with_count.append((
result_count[u'{}:{}'.format(engine_name, result)], result))
header = u' == ENGINE: {0:s} ==\n'.format(engine_name)
output_filehandle.WriteLine(header)
for count, result in sorted(results_with_count, reverse=True):
line = u'{} {}\n'.format(count, result)
output_filehandle.WriteLine(line)
output_filehandle.WriteLine('\n')
if __name__ == '__main__':
Main()
+254
View File
@@ -0,0 +1,254 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A simple tool that provides an overview of running log2timeline processes.
The tool iterates over all process running on the system looking for one
running log2timeline. If it finds it, it will print out information detected
from each process.
There is also an option to drop into an IPython shell to further interact with
the process, giving the user the option to for instance terminate processes
that are in a zombie state.
"""
import argparse
import IPython
import sys
import textwrap
import psutil
from plaso.frontend import frontend
from plaso.multi_processing import process_info
def IsWorkerProcess(process):
"""Checks whether a process is a worker process.
Args:
process: A process object (instance of ProcessInfo).
Returns:
A boolean value indicating whether or not the process is a worker.
"""
# The parent needs to be log2timeline.
if not 'log2timeline' in process.parent.name:
return False
# If it has an active RPC server then we know for sure.
rpc_status = process.GetProcessStatus()
if rpc_status:
return True
# We still want to continue checking, in case the RPC
# server was not working.
# TODO: Add additional tests to verify this is a worker,
# perhaps look at libraries loaded, etc.
return False
class ProcessInformationFrontend(frontend.Frontend):
"""A frontend implementation for the process information tool."""
def __init__(self):
"""Initialize the process information frontend."""
self._input_reader = frontend.StdinFrontendInputReader()
self._output_writer = frontend.StdoutFrontendOutputWriter()
self._parent_list = []
self._process_list = []
super(ProcessInformationFrontend, self).__init__(
self._input_reader, self._output_writer)
def PrintRPCDetails(self, process):
"""Print detailed information about a running process.
Args:
process: A process object (instance of ProcessInfo).
"""
self._output_writer.Write(u'RPC Status:\n')
rpc_status = process.GetProcessStatus()
if rpc_status:
for key, value in rpc_status.iteritems():
self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format(key, value))
else:
self._output_writer.Write(u'\tNo RPC client listening.\n')
def PrintProcessDetails(self, process):
"""Print detailed information about a running process.
Args:
process: A process object (instance of ProcessInfo).
"""
mem_info = process.GetMemoryInformation()
self.PrintSeparatorLine()
self._output_writer.Write(u'\n{0:20s}{1:s} [{2:d}]\n'.format(
u'', process.name, process.pid))
self.PrintSeparatorLine()
self.PrintHeader(u'Basic Information')
self._output_writer.Write(u'Name:\n\t{0:s}\n'.format(process.name))
self._output_writer.Write(u'PID:\n\t{0:d}\n'.format(process.pid))
self._output_writer.Write(u'Command Line:\n\t{0:s}\n'.format(
process.command_line))
self._output_writer.Write(u'Process Alive:\n\t{0!s}\n'.format(
process.IsAlive()))
self._output_writer.Write(u'Process Status:\n\t{0:s}\n'.format(
process.status))
is_a_worker = IsWorkerProcess(process)
if is_a_worker:
self._output_writer.Write(u'This is a worker thread.\n')
else:
self._output_writer.Write(u'This is NOT a worker.\n')
self._output_writer.Write(u'\n')
self.PrintHeader(u' * Additional Information')
self._output_writer.Write(u'Parent PID:\n\t{0:d} ({1:s})\n'.format(
process.parent.pid, process.parent.name))
self._output_writer.Write(u'Children:\n')
for child in process.children:
self._output_writer.Write(u'\t{0:d} [{1:s}]\n'.format(
child.pid, child.name))
if is_a_worker:
self.PrintRPCDetails(process)
self._output_writer.Write('Nr. of Threads:\n\t{0:d}\n'.format(
process.number_of_threads))
self._output_writer.Write('Open files:\n')
for open_file in process.open_files:
self._output_writer.Write(u'\t{0:s}\n'.format(open_file))
self._output_writer.Write(u'Memory:\n')
# We need to access a protected attribute to get the
# name of all the fields in the memory object.
# pylint: disable=protected-access
for field in mem_info._fields:
self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format(
field, getattr(mem_info, field, u'')))
self._output_writer.Write('Memory map: \n')
for memory_map in process.memory_map:
self._output_writer.Write(u'\t{0:s}\n'.format(memory_map.path))
def BuildProcessList(self):
"""Build a list of processes."""
for process_object in psutil.get_process_list():
# TODO: This may catch other processes, such as "vim
# foo/log2timeline/foo.py" since that's in the command line. However the
# python log2timeline.py will cause the older approach of name to fail.
try:
command_line = u' '.join(process_object.cmdline)
# pylint: disable=protected-access
except psutil._error.AccessDenied:
continue
if 'log2timeline' in command_line:
process_details = process_info.ProcessInfo(pid=process_object.pid)
self._process_list.append(process_details)
parent_process = process_details.parent
children = list(process_details.children)
if 'log2timeline' not in parent_process.name and len(children):
self._parent_list.append(process_details)
def TerminateWorkers(self):
for process_object in self._process_list:
# Find out which process is a worker and which one isn't.
if IsWorkerProcess(process_object):
self._output_writer.Write(
u'Killing process: {0:s} [{1:d}] - {2:s}\n'.format(
process_object.name, process_object.pid,
process_object.status))
process_object.TerminateProcess()
def ListProcesses(self):
if self._parent_list:
self._output_writer.Write(u'Main process (careful before killing):\n')
for parent_process in self._parent_list:
if parent_process.IsAlive():
status = u'Alive'
else:
status = u'Dead'
self._output_writer.Write((
u'{4}\n\tPid: {1:d}\n\tCommand Line: {0:s}\n\tStatus:{2} '
u'<{3:s}>\n{4:s}\n').format(
parent_process.command_line, parent_process.pid,
status, parent_process.status, u'-'*40))
self._output_writer.Write(u'\n')
if not self._process_list:
self._output_writer.Write(
u'No processes discovered. Are you sure log2timeline is running?\n')
return
self._output_writer.Write(u'='*80)
self._output_writer.Write(u'\n\t\tDiscovered Processes\n')
self._output_writer.Write(u'='*80)
self._output_writer.Write(u'\n')
for process_object in self._process_list:
self.PrintProcessDetails(process_object)
def Main():
"""Read parameters and run the tool."""
front_end = ProcessInformationFrontend()
description = (
u'A simple tool that tries to list up all processes that belong to '
u'log2timeline. Once a process is detected it will print out '
u'statistical information about it, as well as providing an option '
u'to attempt to "kill" worker threads.')
arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description))
arg_parser.add_argument(
'-c', '--console', dest='console', action='store_true', default=False,
help=u'Open up an IPython console.')
arg_parser.add_argument(
'-k', '--kill-workers', '--kill_workers', dest='kill_workers',
action='store_true', default=False, help=(
u'The tool does a rudimentary check to discover worker threads '
u'and terminates those it finds. This can be used in the case '
u'where the tool is stuck due to a non-functioning worker that '
u'prevents the tool from completing it\'s processing.'))
# TODO: Add an option to specify certain parent if we are killing workers.
options = arg_parser.parse_args()
front_end.BuildProcessList()
if options.console:
IPython.embed()
return True
if options.kill_workers:
front_end.TerminateWorkers()
else:
front_end.ListProcesses()
return True
if __name__ == '__main__':
if not Main():
sys.exit(1)
else:
sys.exit(0)