701 lines
25 KiB
Python
701 lines
25 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2014 The Plaso Project Authors.
|
|
# Please see the AUTHORS file for details on individual authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""The multi-process processing engine."""
|
|
|
|
import ctypes
|
|
import logging
|
|
import multiprocessing
|
|
import os
|
|
import signal
|
|
import sys
|
|
import threading
|
|
|
|
from plaso.engine import collector
|
|
from plaso.engine import engine
|
|
from plaso.engine import queue
|
|
from plaso.engine import worker
|
|
from plaso.lib import errors
|
|
from plaso.multi_processing import foreman
|
|
from plaso.multi_processing import rpc_proxy
|
|
from plaso.parsers import context as parsers_context
|
|
|
|
|
|
def SigKill(pid):
|
|
"""Convenience function to issue a SIGKILL or equivalent.
|
|
|
|
Args:
|
|
pid: The process identifier.
|
|
"""
|
|
if sys.platform.startswith('win'):
|
|
process_terminate = 1
|
|
handle = ctypes.windll.kernel32.OpenProcess(
|
|
process_terminate, False, pid)
|
|
ctypes.windll.kernel32.TerminateProcess(handle, -1)
|
|
ctypes.windll.kernel32.CloseHandle(handle)
|
|
|
|
else:
|
|
try:
|
|
os.kill(pid, signal.SIGKILL)
|
|
except OSError as exception:
|
|
logging.error(
|
|
u'Unable to kill process {0:d} with error: {1:s}'.format(
|
|
pid, exception))
|
|
|
|
|
|
class MultiProcessEngine(engine.BaseEngine):
|
|
"""Class that defines the multi-process engine."""
|
|
|
|
_WORKER_PROCESSES_MINIMUM = 2
|
|
_WORKER_PROCESSES_MAXIMUM = 15
|
|
|
|
def __init__(self, maximum_number_of_queued_items=0):
|
|
"""Initialize the multi-process engine object.
|
|
|
|
Args:
|
|
maximum_number_of_queued_items: The maximum number of queued items.
|
|
The default is 0, which represents
|
|
no limit.
|
|
"""
|
|
collection_queue = MultiProcessingQueue(
|
|
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
|
storage_queue = MultiProcessingQueue(
|
|
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
|
parse_error_queue = MultiProcessingQueue(
|
|
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
|
|
|
super(MultiProcessEngine, self).__init__(
|
|
collection_queue, storage_queue, parse_error_queue)
|
|
|
|
self._collection_process = None
|
|
self._foreman_object = None
|
|
self._storage_process = None
|
|
|
|
# TODO: turn into a process pool.
|
|
self._worker_processes = {}
|
|
|
|
# Attributes for RPC proxy server thread.
|
|
self._proxy_thread = None
|
|
self._rpc_proxy_server = None
|
|
self._rpc_port_number = 0
|
|
|
|
def _StartRPCProxyServerThread(self, foreman_object):
|
|
"""Starts the RPC proxy server thread.
|
|
|
|
Args:
|
|
foreman_object: a foreman object (instance of Foreman).
|
|
"""
|
|
if self._rpc_proxy_server or self._proxy_thread:
|
|
return
|
|
|
|
self._rpc_proxy_server = rpc_proxy.StandardRpcProxyServer(os.getpid())
|
|
|
|
try:
|
|
self._rpc_proxy_server.Open()
|
|
self._rpc_proxy_server.RegisterFunction(
|
|
'signal_end_of_collection', foreman_object.SignalEndOfProcessing)
|
|
|
|
self._proxy_thread = threading.Thread(
|
|
name='rpc_proxy', target=self._rpc_proxy_server.StartProxy)
|
|
self._proxy_thread.start()
|
|
|
|
self._rpc_port_number = self._rpc_proxy_server.listening_port
|
|
|
|
except errors.ProxyFailedToStart as exception:
|
|
logging.error((
|
|
u'Unable to setup a RPC server for the engine with error '
|
|
u'{0:s}').format(exception))
|
|
|
|
def _StopRPCProxyServerThread(self):
|
|
"""Stops the RPC proxy server thread."""
|
|
if not self._rpc_proxy_server or not self._proxy_thread:
|
|
return
|
|
|
|
# Close the proxy, free up resources so we can shut down the thread.
|
|
self._rpc_proxy_server.Close()
|
|
|
|
if self._proxy_thread.isAlive():
|
|
self._proxy_thread.join()
|
|
|
|
self._proxy_thread = None
|
|
self._rpc_proxy_server = None
|
|
self._rpc_port_number = 0
|
|
|
|
def CreateCollector(
|
|
self, include_directory_stat, vss_stores=None, filter_find_specs=None,
|
|
resolver_context=None):
|
|
"""Creates a collector object.
|
|
|
|
The collector discovers all the files that need to be processed by
|
|
the workers. Once a file is discovered it is added to the process queue
|
|
as a path specification (instance of dfvfs.PathSpec).
|
|
|
|
Args:
|
|
include_directory_stat: Boolean value to indicate whether directory
|
|
stat information should be collected.
|
|
vss_stores: Optional list of VSS stores to include in the collection,
|
|
where 1 represents the first store. Set to None if no
|
|
VSS stores should be processed. The default is None.
|
|
filter_find_specs: Optional list of filter find specifications (instances
|
|
of dfvfs.FindSpec). The default is None.
|
|
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
|
The default is None. Note that every thread or process
|
|
must have its own resolver context.
|
|
|
|
Returns:
|
|
A collector object (instance of Collector).
|
|
|
|
Raises:
|
|
RuntimeError: if source path specification is not set.
|
|
"""
|
|
if not self._source_path_spec:
|
|
raise RuntimeError(u'Missing source.')
|
|
|
|
collector_object = collector.Collector(
|
|
self._collection_queue, self._source, self._source_path_spec,
|
|
resolver_context=resolver_context)
|
|
|
|
collector_object.SetCollectDirectoryMetadata(include_directory_stat)
|
|
|
|
if vss_stores:
|
|
collector_object.SetVssInformation(vss_stores)
|
|
|
|
if filter_find_specs:
|
|
collector_object.SetFilter(filter_find_specs)
|
|
|
|
return collector_object
|
|
|
|
def CreateExtractionWorker(self, worker_number):
|
|
"""Creates an extraction worker object.
|
|
|
|
Args:
|
|
worker_number: A number that identifies the worker.
|
|
|
|
Returns:
|
|
An extraction worker (instance of worker.ExtractionWorker).
|
|
"""
|
|
parser_context = parsers_context.ParserContext(
|
|
self._event_queue_producer, self._parse_error_queue_producer,
|
|
self.knowledge_base)
|
|
|
|
extraction_worker = worker.BaseEventExtractionWorker(
|
|
worker_number, self._collection_queue, self._event_queue_producer,
|
|
self._parse_error_queue_producer, parser_context)
|
|
|
|
extraction_worker.SetEnableDebugOutput(self._enable_debug_output)
|
|
|
|
# TODO: move profiler in separate object.
|
|
extraction_worker.SetEnableProfiling(
|
|
self._enable_profiling,
|
|
profiling_sample_rate=self._profiling_sample_rate)
|
|
|
|
if self._open_files:
|
|
extraction_worker.SetOpenFiles(self._open_files)
|
|
|
|
if self._filter_object:
|
|
extraction_worker.SetFilterObject(self._filter_object)
|
|
|
|
if self._mount_path:
|
|
extraction_worker.SetMountPath(self._mount_path)
|
|
|
|
if self._text_prepend:
|
|
extraction_worker.SetTextPrepend(self._text_prepend)
|
|
|
|
return extraction_worker
|
|
|
|
def ProcessSource(
|
|
self, collector_object, storage_writer, parser_filter_string=None,
|
|
number_of_extraction_workers=0, have_collection_process=True,
|
|
have_foreman_process=True, show_memory_usage=False):
|
|
"""Processes the source and extracts event objects.
|
|
|
|
Args:
|
|
collector_object: A collector object (instance of Collector).
|
|
storage_writer: A storage writer object (instance of BaseStorageWriter).
|
|
parser_filter_string: Optional parser filter string. The default is None.
|
|
number_of_extraction_workers: Optional number of extraction worker
|
|
processes. The default is 0 which means
|
|
the function will determine the suitable
|
|
number.
|
|
have_collection_process: Optional boolean value to indidate a separate
|
|
collection process should be run. The default
|
|
is true.
|
|
have_foreman_process: Optional boolean value to indidate a separate
|
|
foreman process should be run to make sure the
|
|
workers are extracting event objects. The default
|
|
is true.
|
|
show_memory_usage: Optional boolean value to indicate memory information
|
|
should be included in logging. The default is false.
|
|
"""
|
|
if number_of_extraction_workers < 1:
|
|
# One worker for each "available" CPU (minus other processes).
|
|
# The number here is derived from the fact that the engine starts up:
|
|
# + A collector process (optional).
|
|
# + A storage process.
|
|
#
|
|
# If we want to utilize all CPUs on the system we therefore need to start
|
|
# up workers that amounts to the total number of CPUs - the other
|
|
# processes.
|
|
cpu_count = multiprocessing.cpu_count() - 2
|
|
if have_collection_process:
|
|
cpu_count -= 1
|
|
|
|
if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
|
|
cpu_count = self._WORKER_PROCESSES_MINIMUM
|
|
|
|
elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
|
|
cpu_count = self._WORKER_PROCESSES_MAXIMUM
|
|
|
|
number_of_extraction_workers = cpu_count
|
|
|
|
if have_foreman_process:
|
|
self._foreman_object = foreman.Foreman(
|
|
show_memory_usage=show_memory_usage)
|
|
self._StartRPCProxyServerThread(self._foreman_object)
|
|
|
|
self._storage_process = MultiProcessStorageProcess(
|
|
storage_writer, name='StorageProcess')
|
|
self._storage_process.start()
|
|
|
|
if have_collection_process:
|
|
self._collection_process = MultiProcessCollectionProcess(
|
|
collector_object, self._rpc_port_number, name='CollectionProcess')
|
|
self._collection_process.start()
|
|
|
|
logging.info(u'Starting extraction worker processes.')
|
|
for worker_number in range(number_of_extraction_workers):
|
|
extraction_worker = self.CreateExtractionWorker(worker_number)
|
|
|
|
worker_name = u'Worker_{0:d}'.format(worker_number)
|
|
|
|
# TODO: Test to see if a process pool can be a better choice.
|
|
worker_process = MultiProcessEventExtractionWorkerProcess(
|
|
extraction_worker, parser_filter_string, name=worker_name)
|
|
worker_process.start()
|
|
|
|
if self._foreman_object:
|
|
self._foreman_object.MonitorWorker(
|
|
pid=worker_process.pid, name=worker_name)
|
|
|
|
self._worker_processes[worker_name] = worker_process
|
|
|
|
logging.debug(u'Collection started.')
|
|
if not self._collection_process:
|
|
collector_object.Collect()
|
|
|
|
else:
|
|
while self._collection_process.is_alive():
|
|
self._collection_process.join(timeout=10)
|
|
|
|
# Check the worker status regularly while collection is still ongoing.
|
|
if self._foreman_object:
|
|
self._foreman_object.CheckStatus()
|
|
|
|
# TODO: We get a signal when collection is done, which might happen
|
|
# before the collection thread joins. Look at the option of speeding
|
|
# up the process of the collector stopping by potentially killing it.
|
|
|
|
logging.info(u'Collection stopped.')
|
|
|
|
self._StopProcessing()
|
|
|
|
def _StopProcessing(self):
|
|
"""Stops the foreman and worker processes."""
|
|
if self._foreman_object:
|
|
self._foreman_object.SignalEndOfProcessing()
|
|
self._StopRPCProxyServerThread()
|
|
|
|
# Run through the running workers, one by one.
|
|
# This will go through a list of all active worker processes and check it's
|
|
# status. If a worker has completed it will be removed from the list.
|
|
# The process will not wait longer than five seconds for each worker to
|
|
# complete, if longer time passes it will simply check it's status and
|
|
# move on. That ensures that worker process is monitored and status is
|
|
# updated.
|
|
while self._worker_processes:
|
|
# Note that self._worker_processes is altered in this loop hence we need
|
|
# it to be sorted.
|
|
for process_name, process_obj in sorted(self._worker_processes.items()):
|
|
if self._foreman_object:
|
|
worker_label = self._foreman_object.GetLabel(
|
|
name=process_name, pid=process_obj.pid)
|
|
else:
|
|
worker_label = None
|
|
|
|
if not worker_label:
|
|
if process_obj.is_alive():
|
|
logging.info((
|
|
u'Process {0:s} [{1:d}] is not monitored by the foreman. Most '
|
|
u'likely due to a worker having completed it\'s processing '
|
|
u'while waiting for another worker to complete.').format(
|
|
process_name, process_obj.pid))
|
|
logging.info(
|
|
u'Waiting for worker {0:s} to complete.'.format(process_name))
|
|
process_obj.join()
|
|
logging.info(u'Worker: {0:s} [{1:d}] has completed.'.format(
|
|
process_name, process_obj.pid))
|
|
|
|
del self._worker_processes[process_name]
|
|
continue
|
|
|
|
if process_obj.is_alive():
|
|
# Check status of worker.
|
|
self._foreman_object.CheckStatus(label=worker_label)
|
|
process_obj.join(timeout=5)
|
|
|
|
# Note that we explicitly must test against exitcode 0 here since
|
|
# process.exitcode will be None if there is no exitcode.
|
|
elif process_obj.exitcode != 0:
|
|
logging.warning((
|
|
u'Worker process: {0:s} already exited with code: '
|
|
u'{1:d}.').format(process_name, process_obj.exitcode))
|
|
process_obj.terminate()
|
|
self._foreman_object.TerminateProcess(label=worker_label)
|
|
|
|
else:
|
|
# Process is no longer alive, no need to monitor.
|
|
self._foreman_object.StopMonitoringWorker(label=worker_label)
|
|
# Remove it from our list of active workers.
|
|
del self._worker_processes[process_name]
|
|
|
|
if self._foreman_object:
|
|
self._foreman_object = None
|
|
|
|
logging.info(u'Extraction workers stopped.')
|
|
self._event_queue_producer.SignalEndOfInput()
|
|
|
|
self._storage_process.join()
|
|
logging.info(u'Storage writer stopped.')
|
|
|
|
def _AbortNormal(self, timeout=None):
|
|
"""Abort in a normal way.
|
|
|
|
Args:
|
|
timeout: The process join timeout. The default is None meaning
|
|
no timeout.
|
|
"""
|
|
if self._collection_process:
|
|
logging.warning(u'Signaling collection process to abort.')
|
|
self._collection_process.SignalAbort()
|
|
|
|
if self._worker_processes:
|
|
logging.warning(u'Signaling worker processes to abort.')
|
|
for _, worker_process in self._worker_processes.iteritems():
|
|
worker_process.SignalAbort()
|
|
|
|
logging.warning(u'Signaling storage process to abort.')
|
|
self._event_queue_producer.SignalEndOfInput()
|
|
self._storage_process.SignalAbort()
|
|
|
|
if self._collection_process:
|
|
logging.warning(u'Waiting for collection process: {0:d}.'.format(
|
|
self._collection_process.pid))
|
|
# TODO: it looks like xmlrpclib.ServerProxy is not allowing the
|
|
# collection process to close.
|
|
self._collection_process.join(timeout=timeout)
|
|
|
|
if self._worker_processes:
|
|
for worker_name, worker_process in self._worker_processes.iteritems():
|
|
logging.warning(u'Waiting for worker: {0:s} process: {1:d}'.format(
|
|
worker_name, worker_process.pid))
|
|
worker_process.join(timeout=timeout)
|
|
|
|
if self._storage_process:
|
|
logging.warning(u'Waiting for storage process: {0:d}.'.format(
|
|
self._collection_process.pid))
|
|
self._storage_process.join(timeout=timeout)
|
|
|
|
def _AbortTerminate(self):
|
|
"""Abort processing by sending SIGTERM or equivalent."""
|
|
if self._collection_process and self._collection_process.is_alive():
|
|
logging.warning(u'Terminating collection process: {0:d}.'.format(
|
|
self._collection_process.pid))
|
|
self._collection_process.terminate()
|
|
|
|
if self._worker_processes:
|
|
for worker_name, worker_process in self._worker_processes.iteritems():
|
|
if worker_process.is_alive():
|
|
logging.warning(u'Terminating worker: {0:s} process: {1:d}'.format(
|
|
worker_name, worker_process.pid))
|
|
worker_process.terminate()
|
|
|
|
if self._storage_process and self._storage_process.is_alive():
|
|
logging.warning(u'Terminating storage process: {0:d}.'.format(
|
|
self._storage_process.pid))
|
|
self._storage_process.terminate()
|
|
|
|
def _AbortKill(self):
|
|
"""Abort processing by sending SIGKILL or equivalent."""
|
|
if self._collection_process and self._collection_process.is_alive():
|
|
logging.warning(u'Killing collection process: {0:d}.'.format(
|
|
self._collection_process.pid))
|
|
SigKill(self._collection_process.pid)
|
|
|
|
if self._worker_processes:
|
|
for worker_name, worker_process in self._worker_processes.iteritems():
|
|
if worker_process.is_alive():
|
|
logging.warning(u'Killing worker: {0:s} process: {1:d}'.format(
|
|
worker_name, worker_process.pid))
|
|
SigKill(worker_process.pid)
|
|
|
|
if self._storage_process and self._storage_process.is_alive():
|
|
logging.warning(u'Killing storage process: {0:d}.'.format(
|
|
self._storage_process.pid))
|
|
SigKill(self._storage_process.pid)
|
|
|
|
def SignalAbort(self):
|
|
"""Signals the engine to abort."""
|
|
super(MultiProcessEngine, self).SignalAbort()
|
|
|
|
try:
|
|
self._AbortNormal(timeout=2)
|
|
self._AbortTerminate()
|
|
except KeyboardInterrupt:
|
|
self._AbortKill()
|
|
|
|
# TODO: remove the need for this.
|
|
# Sometimes the main process will be unresponsive.
|
|
SigKill(os.getpid())
|
|
|
|
|
|
class MultiProcessCollectionProcess(multiprocessing.Process):
|
|
"""Class that defines a multi-processing collection process."""
|
|
|
|
def __init__(self, collector_object, rpc_port_number, **kwargs):
|
|
"""Initializes the process object.
|
|
|
|
Args:
|
|
collector_object: A collector object (instance of Collector).
|
|
rpc_port_number: An integer value containing the RPC end point port
|
|
number or 0 if not set.
|
|
"""
|
|
super(MultiProcessCollectionProcess, self).__init__(**kwargs)
|
|
self._collector_object = collector_object
|
|
self._rpc_port_number = rpc_port_number
|
|
|
|
# This method part of the multiprocessing.Process interface hence its name
|
|
# is not following the style guide.
|
|
def run(self):
|
|
"""The main loop."""
|
|
# Prevent the KeyboardInterrupt being raised inside the worker process.
|
|
# This will prevent a collection process to generate a traceback
|
|
# when interrupted.
|
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
|
|
logging.debug(u'Collection process: {0!s} started'.format(self._name))
|
|
|
|
rpc_proxy_client = None
|
|
if self._rpc_port_number:
|
|
try:
|
|
rpc_proxy_client = rpc_proxy.StandardRpcProxyClient(
|
|
self._rpc_port_number)
|
|
rpc_proxy_client.Open()
|
|
|
|
except errors.ProxyFailedToStart as exception:
|
|
logging.error((
|
|
u'Unable to setup a RPC client for the collector process with '
|
|
u'error {0:s}').format(exception))
|
|
|
|
self._collector_object.Collect()
|
|
|
|
logging.debug(u'Collection process: {0!s} stopped'.format(self._name))
|
|
if rpc_proxy_client:
|
|
_ = rpc_proxy_client.GetData(u'signal_end_of_collection')
|
|
|
|
def SignalAbort(self):
|
|
"""Signals the process to abort."""
|
|
self._collector_object.SignalAbort()
|
|
|
|
|
|
class MultiProcessEventExtractionWorkerProcess(multiprocessing.Process):
|
|
"""Class that defines a multi-processing event extraction worker process."""
|
|
|
|
def __init__(self, extraction_worker, parser_filter_string, **kwargs):
|
|
"""Initializes the process object.
|
|
|
|
Args:
|
|
extraction_worker: The extraction worker object (instance of
|
|
MultiProcessEventExtractionWorker).
|
|
parser_filter_string: Optional parser filter string. The default is None.
|
|
"""
|
|
super(MultiProcessEventExtractionWorkerProcess, self).__init__(**kwargs)
|
|
self._extraction_worker = extraction_worker
|
|
|
|
# TODO: clean this up with the implementation of a task based
|
|
# multi-processing approach.
|
|
self._parser_filter_string = parser_filter_string
|
|
|
|
# Attributes for RPC proxy server thread.
|
|
self._proxy_thread = None
|
|
self._rpc_proxy_server = None
|
|
|
|
def _StartRPCProxyServerThread(self):
|
|
"""Starts the RPC proxy server thread."""
|
|
if self._rpc_proxy_server or self._proxy_thread:
|
|
return
|
|
|
|
# Set up a simple XML RPC server for the worker for status indications.
|
|
# Since we don't know the worker's PID for now we'll set the initial port
|
|
# number to zero and then adjust it later.
|
|
self._rpc_proxy_server = rpc_proxy.StandardRpcProxyServer()
|
|
|
|
try:
|
|
self._rpc_proxy_server.SetListeningPort(os.getpid())
|
|
self._rpc_proxy_server.Open()
|
|
self._rpc_proxy_server.RegisterFunction(
|
|
'status', self._extraction_worker.GetStatus)
|
|
|
|
self._proxy_thread = threading.Thread(
|
|
name='rpc_proxy', target=self._rpc_proxy_server.StartProxy)
|
|
self._proxy_thread.start()
|
|
|
|
except errors.ProxyFailedToStart as exception:
|
|
logging.error((
|
|
u'Unable to setup a RPC server for the worker: {0:d} [PID {1:d}] '
|
|
u'with error: {2:s}').format(
|
|
self._identifier, os.getpid(), exception))
|
|
|
|
def _StopRPCProxyServerThread(self):
|
|
"""Stops the RPC proxy server thread."""
|
|
if not self._rpc_proxy_server or not self._proxy_thread:
|
|
return
|
|
|
|
# Close the proxy, free up resources so we can shut down the thread.
|
|
self._rpc_proxy_server.Close()
|
|
|
|
if self._proxy_thread.isAlive():
|
|
self._proxy_thread.join()
|
|
|
|
self._rpc_proxy_server = None
|
|
self._proxy_thread = None
|
|
|
|
# This method part of the multiprocessing.Process interface hence its name
|
|
# is not following the style guide.
|
|
def run(self):
|
|
"""The main loop."""
|
|
# Prevent the KeyboardInterrupt being raised inside the worker process.
|
|
# This will prevent a worker process to generate a traceback
|
|
# when interrupted.
|
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
|
|
# We need to initialize the parser object after the process
|
|
# has forked otherwise on Windows the "fork" will fail with
|
|
# a PickleError for Python modules that cannot be pickled.
|
|
self._extraction_worker.InitalizeParserObjects(
|
|
parser_filter_string=self._parser_filter_string)
|
|
|
|
logging.debug(u'Worker process: {0!s} started'.format(self._name))
|
|
self._StartRPCProxyServerThread()
|
|
|
|
self._extraction_worker.Run()
|
|
|
|
logging.debug(u'Worker process: {0!s} stopped'.format(self._name))
|
|
self._StopRPCProxyServerThread()
|
|
|
|
def SignalAbort(self):
|
|
"""Signals the process to abort."""
|
|
self._extraction_worker.SignalAbort()
|
|
|
|
|
|
class MultiProcessStorageProcess(multiprocessing.Process):
|
|
"""Class that defines a multi-processing storage process."""
|
|
|
|
def __init__(self, storage_writer, **kwargs):
|
|
"""Initializes the process object.
|
|
|
|
Args:
|
|
storage_writer: A storage writer object (instance of BaseStorageWriter).
|
|
"""
|
|
super(MultiProcessStorageProcess, self).__init__(**kwargs)
|
|
self._storage_writer = storage_writer
|
|
|
|
# This method part of the multiprocessing.Process interface hence its name
|
|
# is not following the style guide.
|
|
def run(self):
|
|
"""The main loop."""
|
|
# Prevent the KeyboardInterrupt being raised inside the worker process.
|
|
# This will prevent a storage process to generate a traceback
|
|
# when interrupted.
|
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
|
|
logging.debug(u'Storage process: {0!s} started'.format(self._name))
|
|
self._storage_writer.WriteEventObjects()
|
|
logging.debug(u'Storage process: {0!s} stopped'.format(self._name))
|
|
|
|
def SignalAbort(self):
|
|
"""Signals the process to abort."""
|
|
self._storage_writer.SignalAbort()
|
|
|
|
|
|
class MultiProcessingQueue(queue.Queue):
|
|
"""Class that defines the multi-processing queue."""
|
|
|
|
def __init__(self, maximum_number_of_queued_items=0):
|
|
"""Initializes the multi-processing queue object.
|
|
|
|
Args:
|
|
maximum_number_of_queued_items: The maximum number of queued items.
|
|
The default is 0, which represents
|
|
no limit.
|
|
"""
|
|
super(MultiProcessingQueue, self).__init__()
|
|
|
|
# maxsize contains the maximum number of items allowed to be queued,
|
|
# where 0 represents unlimited.
|
|
# We need to check that we aren't asking for a bigger queue than the
|
|
# platform supports, which requires access to this protected member.
|
|
# pylint: disable=protected-access
|
|
queue_max_length = multiprocessing._multiprocessing.SemLock.SEM_VALUE_MAX
|
|
# pylint: enable=protected-access
|
|
if maximum_number_of_queued_items > queue_max_length:
|
|
logging.warn(
|
|
u'Maximum queue size requested ({0:d}) is larger than system '
|
|
u'supported maximum size. Setting queue size to maximum supported '
|
|
u'size, '
|
|
u'({1:d})'.format(maximum_number_of_queued_items, queue_max_length))
|
|
maximum_number_of_queued_items = queue_max_length
|
|
self._queue = multiprocessing.Queue(
|
|
maxsize=maximum_number_of_queued_items)
|
|
|
|
def __len__(self):
|
|
"""Returns the estimated current number of items in the queue."""
|
|
size = 0
|
|
try:
|
|
size = self._queue.qsize()
|
|
except NotImplementedError:
|
|
logging.warning((
|
|
u'Returning queue length does not work on Mac OS X because of broken '
|
|
u'sem_getvalue()'))
|
|
raise
|
|
|
|
return size
|
|
|
|
def IsEmpty(self):
|
|
"""Determines if the queue is empty."""
|
|
return self._queue.empty()
|
|
|
|
def PushItem(self, item):
|
|
"""Pushes an item onto the queue."""
|
|
self._queue.put(item)
|
|
|
|
def PopItem(self):
|
|
"""Pops an item off the queue."""
|
|
try:
|
|
return self._queue.get()
|
|
except KeyboardInterrupt:
|
|
raise errors.QueueEmpty
|