Import from old repository

2020-04-06 18:48:34 +02:00
commit 0da6783a45
762 changed files with 103065 additions and 0 deletions
@@ -0,0 +1,8 @@
+This folder contains few extra nuggets, scripts that somehow use the plaso
+libraries or other files that can be used with plaso. In other words useful
+tools that use the underlying infrastructure to perform actions that are not
+part of the original design.
+
+There is no formal setup file for any of the scripts here, these are mostly
+provided here as a PoC showing what can be done using the plaso libraries
+to extend the tool.
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,242 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2013 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extract search history from a plaso storage file and enjoy a cup of tea.
+
+A very simple script that takes as an input a plaso storage file
+and then tries to extract common search engine history from it and spit
+it out to your lovely little screen or a file of your choosings.
+"""
+import argparse
+import locale
+import logging
+import os
+import sys
+import urllib
+
+# pylint: disable=unused-import
+from plaso import filters
+from plaso import formatters
+
+from plaso.lib import output
+from plaso.lib import storage
+
+# Here we define filters and callback methods for all hits on each filter.
+FILTERS = (
+    (('source is "WEBHIST" and url iregexp "(www.|encrypted.|/)google." and '
+      'url contains "search"'), 'GoogleSearch'),
+    ('source is "WEBHIST" and url contains "youtube.com"', 'YouTube'),
+    (('source is "WEBHIST" and url contains "bing.com" and url contains '
+      '"search"'), 'BingSearch'),
+    ('source is "WEBHIST" and url contains "mail.google.com"', 'Gmail'),
+    (('source is "WEBHIST" and url contains "yandex.com" and url contains '
+      '"yandsearch"'), 'Yandex'),
+    ('source is "WEBHIST" and url contains "duckduckgo.com"', 'DuckDuckGo')
+)
+
+
+def ScrubLine(line):
+  """Scrub the line of most obvious HTML codes.
+
+  An attempt at taking a line and swapping all instances
+  of %XX which represent a character in hex with it's
+  unicode character.
+
+  Args:
+    line: The string that we are about to "fix".
+
+  Returns:
+    String that has it's %XX hex codes swapped for text.
+  """
+  if not line:
+    return ''
+
+  if not '%' in line:
+    return line
+
+  try:
+    return unicode(urllib.unquote(str(line)), 'utf-8')
+  except UnicodeDecodeError:
+    logging.warning(u'Unable to decode line: {0:s}'.format(line))
+
+  return line
+
+
+class FilterClass(object):
+  """A class that contains all the parser functions."""
+
+  @classmethod
+  def _GetBetweenQEqualsAndAmbersand(cls, string):
+    """Return back string that is defined 'q=' and '&'."""
+    if 'q=' not in string:
+      return string
+    _, _, line = string.partition('q=')
+    before_and, _, _ = line.partition('&')
+    if not before_and:
+      return line
+    return before_and.split()[0]
+
+  @classmethod
+  def _SearchAndQInLine(cls, string):
+    """Return a bool indicating if the words q= and search appear in string."""
+    if 'search' not in string:
+      return False
+
+    if 'q=' not in string:
+      return False
+
+    return True
+
+  @classmethod
+  def GoogleSearch(cls, url):
+    """Return back the extracted string."""
+    if not cls._SearchAndQInLine(url):
+      return
+
+    line = cls._GetBetweenQEqualsAndAmbersand(url)
+    if not line:
+      return
+
+    return line.replace('+', ' ')
+
+  @classmethod
+  def YouTube(cls, url):
+    """Return back the extracted string."""
+    return cls.GenericSearch(url)
+
+  @classmethod
+  def BingSearch(cls, url):
+    """Return back the extracted string."""
+    return cls.GenericSearch(url)
+
+  @classmethod
+  def GenericSearch(cls, url):
+    """Return back the extracted string from a generic search engine."""
+    if not cls._SearchAndQInLine(url):
+      return
+
+    return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
+
+  @classmethod
+  def Yandex(cls, url):
+    """Return back the results from Yandex search engine."""
+    if 'text=' not in url:
+      return
+    _, _, line = url.partition('text=')
+    before_and, _, _ = line.partition('&')
+    if not before_and:
+      return
+    yandex_search_url = before_and.split()[0]
+
+    return yandex_search_url.replace('+', ' ')
+
+  @classmethod
+  def DuckDuckGo(cls, url):
+    """Return back the extracted string."""
+    if not 'q=' in url:
+      return
+    return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ')
+
+  @classmethod
+  def Gmail(cls, url):
+    """Return back the extracted string."""
+    if 'search/' not in url:
+      return
+
+    _, _, line = url.partition('search/')
+    first, _, _ = line.partition('/')
+    second, _, _ = first.partition('?compose')
+
+    return second.replace('+', ' ')
+
+
+def Main():
+  """Run the tool."""
+  arg_parser = argparse.ArgumentParser(
+      description=(
+          'plaso_extract_search_history is a simple script that reads the '
+          'content of a plaso storage file and tries to extract known search '
+          'engine history from it'))
+
+  arg_parser.add_argument(
+      '-w', '--write', metavar='FILENAME', action='store', dest='output_file',
+      default='', help='Write results to a file.')
+
+  arg_parser.add_argument(
+      'filename', action='store', metavar='STORAGE_FILE', help=(
+          'The path to the plaso storage file.'))
+
+  options = arg_parser.parse_args()
+  preferred_encoding = locale.getpreferredencoding()
+  if preferred_encoding.lower() == 'ascii':
+    preferred_encoding = 'utf-8'
+
+  if not os.path.isfile(options.filename):
+    raise RuntimeError(u'File {} does not exist'.format(options.filename))
+
+  results = {}
+  result_count = {}
+
+  output_filehandle = output.OutputFilehandle(preferred_encoding)
+  if options.output_file:
+    output_filehandle.Open(path=options.output_file)
+  else:
+    output_filehandle.Open(sys.stdout)
+
+  # Build filters.
+  filter_dict = {}
+  for filter_str, call_back in FILTERS:
+    filter_obj = filters.GetFilter(filter_str)
+    call_back_obj = getattr(FilterClass, call_back, None)
+    results[call_back] = []
+    if filter_obj and call_back_obj:
+      filter_dict[filter_obj] = (call_back, call_back_obj)
+
+  with storage.StorageFile(options.filename, read_only=True) as store:
+    event_object = store.GetSortedEntry()
+    while event_object:
+      for filter_obj, call_backs in filter_dict.items():
+        call_back_name, call_back_object = call_backs
+        if filter_obj.Match(event_object):
+          url_attribute = getattr(event_object, 'url', None)
+          if not url_attribute:
+            continue
+          ret_line = ScrubLine(call_back_object(url_attribute))
+          if not ret_line:
+            continue
+          if ret_line in results[call_back_name]:
+            result_count[u'{}:{}'.format(call_back_name, ret_line)] += 1
+          else:
+            results[call_back_name].append(ret_line)
+            result_count[u'{}:{}'.format(call_back_name, ret_line)] = 1
+      event_object = store.GetSortedEntry()
+
+  for engine_name, result_list in results.items():
+    results_with_count = []
+    for result in result_list:
+      results_with_count.append((
+          result_count[u'{}:{}'.format(engine_name, result)], result))
+
+    header = u' == ENGINE: {0:s} ==\n'.format(engine_name)
+    output_filehandle.WriteLine(header)
+    for count, result in sorted(results_with_count, reverse=True):
+      line = u'{} {}\n'.format(count, result)
+      output_filehandle.WriteLine(line)
+    output_filehandle.WriteLine('\n')
+
+
+if __name__ == '__main__':
+  Main()
@@ -0,0 +1,254 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2014 The Plaso Project Authors.
+# Please see the AUTHORS file for details on individual authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A simple tool that provides an overview of running log2timeline processes.
+
+The tool iterates over all process running on the system looking for one
+running log2timeline. If it finds it, it will print out information detected
+from each process.
+
+There is also an option to drop into an IPython shell to further interact with
+the process, giving the user the option to for instance terminate processes
+that are in a zombie state.
+"""
+
+import argparse
+import IPython
+import sys
+import textwrap
+
+import psutil
+
+from plaso.frontend import frontend
+from plaso.multi_processing import process_info
+
+
+def IsWorkerProcess(process):
+  """Checks whether a process is a worker process.
+
+  Args:
+    process: A process object (instance of ProcessInfo).
+
+  Returns:
+    A boolean value indicating whether or not the process is a worker.
+  """
+  # The parent needs to be log2timeline.
+  if not 'log2timeline' in process.parent.name:
+    return False
+
+  # If it has an active RPC server then we know for sure.
+  rpc_status = process.GetProcessStatus()
+  if rpc_status:
+    return True
+
+  # We still want to continue checking, in case the RPC
+  # server was not working.
+  # TODO: Add additional tests to verify this is a worker,
+  # perhaps look at libraries loaded, etc.
+  return False
+
+
+class ProcessInformationFrontend(frontend.Frontend):
+  """A frontend implementation for the process information tool."""
+
+  def __init__(self):
+    """Initialize the process information frontend."""
+    self._input_reader = frontend.StdinFrontendInputReader()
+    self._output_writer = frontend.StdoutFrontendOutputWriter()
+    self._parent_list = []
+    self._process_list = []
+
+    super(ProcessInformationFrontend, self).__init__(
+        self._input_reader, self._output_writer)
+
+  def PrintRPCDetails(self, process):
+    """Print detailed information about a running process.
+
+    Args:
+      process: A process object (instance of ProcessInfo).
+    """
+    self._output_writer.Write(u'RPC Status:\n')
+    rpc_status = process.GetProcessStatus()
+    if rpc_status:
+      for key, value in rpc_status.iteritems():
+        self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format(key, value))
+    else:
+      self._output_writer.Write(u'\tNo RPC client listening.\n')
+
+  def PrintProcessDetails(self, process):
+    """Print detailed information about a running process.
+
+    Args:
+      process: A process object (instance of ProcessInfo).
+    """
+    mem_info = process.GetMemoryInformation()
+
+    self.PrintSeparatorLine()
+    self._output_writer.Write(u'\n{0:20s}{1:s} [{2:d}]\n'.format(
+        u'', process.name, process.pid))
+    self.PrintSeparatorLine()
+    self.PrintHeader(u'Basic Information')
+    self._output_writer.Write(u'Name:\n\t{0:s}\n'.format(process.name))
+    self._output_writer.Write(u'PID:\n\t{0:d}\n'.format(process.pid))
+    self._output_writer.Write(u'Command Line:\n\t{0:s}\n'.format(
+        process.command_line))
+    self._output_writer.Write(u'Process Alive:\n\t{0!s}\n'.format(
+        process.IsAlive()))
+    self._output_writer.Write(u'Process Status:\n\t{0:s}\n'.format(
+        process.status))
+
+    is_a_worker = IsWorkerProcess(process)
+    if is_a_worker:
+      self._output_writer.Write(u'This is a worker thread.\n')
+    else:
+      self._output_writer.Write(u'This is NOT a worker.\n')
+
+    self._output_writer.Write(u'\n')
+    self.PrintHeader(u' * Additional Information')
+    self._output_writer.Write(u'Parent PID:\n\t{0:d} ({1:s})\n'.format(
+        process.parent.pid, process.parent.name))
+    self._output_writer.Write(u'Children:\n')
+    for child in process.children:
+      self._output_writer.Write(u'\t{0:d} [{1:s}]\n'.format(
+          child.pid, child.name))
+
+    if is_a_worker:
+      self.PrintRPCDetails(process)
+
+    self._output_writer.Write('Nr. of Threads:\n\t{0:d}\n'.format(
+        process.number_of_threads))
+
+    self._output_writer.Write('Open files:\n')
+    for open_file in process.open_files:
+      self._output_writer.Write(u'\t{0:s}\n'.format(open_file))
+
+    self._output_writer.Write(u'Memory:\n')
+    # We need to access a protected attribute to get the
+    # name of all the fields in the memory object.
+    # pylint: disable=protected-access
+    for field in mem_info._fields:
+      self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format(
+          field, getattr(mem_info, field, u'')))
+
+    self._output_writer.Write('Memory map: \n')
+    for memory_map in process.memory_map:
+      self._output_writer.Write(u'\t{0:s}\n'.format(memory_map.path))
+
+  def BuildProcessList(self):
+    """Build a list of processes."""
+    for process_object in psutil.get_process_list():
+      # TODO: This may catch other processes, such as "vim
+      # foo/log2timeline/foo.py" since that's in the command line. However the
+      # python log2timeline.py will cause the older approach of name to fail.
+      try:
+        command_line = u' '.join(process_object.cmdline)
+      # pylint: disable=protected-access
+      except psutil._error.AccessDenied:
+        continue
+      if 'log2timeline' in command_line:
+        process_details = process_info.ProcessInfo(pid=process_object.pid)
+        self._process_list.append(process_details)
+        parent_process = process_details.parent
+        children = list(process_details.children)
+        if 'log2timeline' not in parent_process.name and len(children):
+          self._parent_list.append(process_details)
+
+  def TerminateWorkers(self):
+    for process_object in self._process_list:
+      # Find out which process is a worker and which one isn't.
+      if IsWorkerProcess(process_object):
+        self._output_writer.Write(
+            u'Killing process: {0:s} [{1:d}] - {2:s}\n'.format(
+                process_object.name, process_object.pid,
+                process_object.status))
+        process_object.TerminateProcess()
+
+  def ListProcesses(self):
+    if self._parent_list:
+      self._output_writer.Write(u'Main process (careful before killing):\n')
+      for parent_process in self._parent_list:
+        if parent_process.IsAlive():
+          status = u'Alive'
+        else:
+          status = u'Dead'
+
+        self._output_writer.Write((
+            u'{4}\n\tPid: {1:d}\n\tCommand Line: {0:s}\n\tStatus:{2} '
+            u'<{3:s}>\n{4:s}\n').format(
+                parent_process.command_line, parent_process.pid,
+                status, parent_process.status, u'-'*40))
+      self._output_writer.Write(u'\n')
+
+    if not self._process_list:
+      self._output_writer.Write(
+          u'No processes discovered. Are you sure log2timeline is running?\n')
+      return
+
+    self._output_writer.Write(u'='*80)
+    self._output_writer.Write(u'\n\t\tDiscovered Processes\n')
+    self._output_writer.Write(u'='*80)
+    self._output_writer.Write(u'\n')
+    for process_object in self._process_list:
+      self.PrintProcessDetails(process_object)
+
+
+def Main():
+  """Read parameters and run the tool."""
+  front_end = ProcessInformationFrontend()
+
+  description = (
+      u'A simple tool that tries to list up all processes that belong to '
+      u'log2timeline. Once a process is detected it will print out '
+      u'statistical information about it, as well as providing an option '
+      u'to attempt to "kill" worker threads.')
+  arg_parser = argparse.ArgumentParser(
+      description=textwrap.dedent(description))
+
+  arg_parser.add_argument(
+      '-c', '--console', dest='console', action='store_true', default=False,
+      help=u'Open up an IPython console.')
+
+  arg_parser.add_argument(
+      '-k', '--kill-workers', '--kill_workers', dest='kill_workers',
+      action='store_true', default=False, help=(
+          u'The tool does a rudimentary check to discover worker threads '
+          u'and terminates those it finds. This can be used in the case '
+          u'where the tool is stuck due to a non-functioning worker that '
+          u'prevents the tool from completing it\'s processing.'))
+
+  # TODO: Add an option to specify certain parent if we are killing workers.
+  options = arg_parser.parse_args()
+
+  front_end.BuildProcessList()
+
+  if options.console:
+    IPython.embed()
+    return True
+
+  if options.kill_workers:
+    front_end.TerminateWorkers()
+  else:
+    front_end.ListProcesses()
+
+  return True
+
+
+if __name__ == '__main__':
+  if not Main():
+    sys.exit(1)
+  else:
+    sys.exit(0)