Import from old repository
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The file format classifier."""
|
||||
|
||||
# TODO: rewrite most of the classifier in C and integrate with the code in:
|
||||
# plaso/classifier
|
||||
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import tarfile
|
||||
import zipfile
|
||||
import zlib
|
||||
|
||||
from dfvfs.lib import definitions
|
||||
from dfvfs.path import factory as path_spec_factory
|
||||
from dfvfs.resolver import resolver as path_spec_resolver
|
||||
|
||||
from plaso.lib import errors
|
||||
|
||||
|
||||
class Classifier(object):
|
||||
"""Class that defines the file format classifier."""
|
||||
|
||||
_MAGIC_VALUES = {
|
||||
'ZIP': {'length': 4, 'offset': 0, 'values': ['P', 'K', '\x03', '\x04']},
|
||||
'TAR': {'length': 5, 'offset': 257, 'values': ['u', 's', 't', 'a', 'r']},
|
||||
'GZ': {'length': 2, 'offset': 0, 'values': ['\x1f', '\x8b']},
|
||||
}
|
||||
|
||||
# TODO: Remove this logic when the classifier is ready.
|
||||
# This is only used temporary until files can be classified.
|
||||
magic_max_length = 0
|
||||
|
||||
# Defines the maximum depth into a file (for SmartOpenFiles).
|
||||
MAX_FILE_DEPTH = 3
|
||||
|
||||
@classmethod
|
||||
def _SmartOpenFile(cls, file_entry):
|
||||
"""Return a generator for all pathspec protobufs extracted from a file.
|
||||
|
||||
If the file is compressed then extract all members and include
|
||||
them into the processing queue.
|
||||
|
||||
Args:
|
||||
file_entry: The file entry object.
|
||||
|
||||
Yields:
|
||||
A path specification (instance of dfvfs.PathSpec) of embedded file
|
||||
entries.
|
||||
"""
|
||||
file_object = file_entry.GetFileObject()
|
||||
|
||||
# TODO: Remove when classifier gets deployed. Then we
|
||||
# call the classifier here and use that for definition (and
|
||||
# then we forward the classifier definition in the pathspec
|
||||
# protobuf.
|
||||
file_object.seek(0, os.SEEK_SET)
|
||||
|
||||
if not cls.magic_max_length:
|
||||
for magic_value in cls._MAGIC_VALUES.values():
|
||||
cls.magic_max_length = max(
|
||||
cls.magic_max_length,
|
||||
magic_value['length'] + magic_value['offset'])
|
||||
|
||||
header = file_object.read(cls.magic_max_length)
|
||||
|
||||
file_classification = ''
|
||||
# Go over each and every magic value defined and compare
|
||||
# each read byte (according to original offset and current one)
|
||||
# If all match, then we have a particular file format and we
|
||||
# can move on.
|
||||
for m_value, m_dict in cls._MAGIC_VALUES.items():
|
||||
length = m_dict['length'] + m_dict['offset']
|
||||
if len(header) < length:
|
||||
continue
|
||||
|
||||
offset = m_dict['offset']
|
||||
magic = m_dict['values']
|
||||
|
||||
if header[offset:offset + len(magic)] == ''.join(magic):
|
||||
file_classification = m_value
|
||||
break
|
||||
|
||||
# TODO: refactor the file type specific code into sub functions.
|
||||
if file_classification == 'ZIP':
|
||||
try:
|
||||
file_object.seek(0, os.SEEK_SET)
|
||||
zip_file = zipfile.ZipFile(file_object, 'r')
|
||||
|
||||
# TODO: Make this is a more "sane" check, and perhaps
|
||||
# not entirely skip the file if it has this particular
|
||||
# ending, but for now, this both slows the tool down
|
||||
# considerably and makes it also more unstable.
|
||||
_, _, filename_extension = file_entry.name.rpartition(u'.')
|
||||
|
||||
if filename_extension in [u'.jar', u'.sym', u'.xpi']:
|
||||
file_object.close()
|
||||
logging.debug(
|
||||
u'Unsupported ZIP sub type: {0:s} detected in file: {1:s}'.format(
|
||||
filename_extension, file_entry.path_spec.comparable))
|
||||
return
|
||||
|
||||
for info in zip_file.infolist():
|
||||
if info.file_size > 0:
|
||||
logging.debug(
|
||||
u'Including: {0:s} from ZIP into process queue.'.format(
|
||||
info.filename))
|
||||
|
||||
yield path_spec_factory.Factory.NewPathSpec(
|
||||
definitions.TYPE_INDICATOR_ZIP, location=info.filename,
|
||||
parent=file_entry.path_spec)
|
||||
|
||||
except zipfile.BadZipfile:
|
||||
pass
|
||||
|
||||
elif file_classification == 'GZ':
|
||||
try:
|
||||
type_indicator = file_entry.path_spec.type_indicator
|
||||
if type_indicator == definitions.TYPE_INDICATOR_GZIP:
|
||||
raise errors.SameFileType
|
||||
|
||||
file_object.seek(0, os.SEEK_SET)
|
||||
gzip_file = gzip.GzipFile(fileobj=file_object, mode='rb')
|
||||
_ = gzip_file.read(4)
|
||||
gzip_file.close()
|
||||
|
||||
logging.debug((
|
||||
u'Including: {0:s} as GZIP compressed stream into process '
|
||||
u'queue.').format(file_entry.name))
|
||||
|
||||
yield path_spec_factory.Factory.NewPathSpec(
|
||||
definitions.TYPE_INDICATOR_GZIP, parent=file_entry.path_spec)
|
||||
|
||||
except (IOError, zlib.error, errors.SameFileType):
|
||||
pass
|
||||
|
||||
# TODO: Add BZ2 support.
|
||||
elif file_classification == 'TAR':
|
||||
try:
|
||||
file_object.seek(0, os.SEEK_SET)
|
||||
tar_file = tarfile.open(fileobj=file_object, mode='r')
|
||||
|
||||
for name_info in tar_file.getmembers():
|
||||
if not name_info.isfile():
|
||||
continue
|
||||
|
||||
name = name_info.path
|
||||
logging.debug(
|
||||
u'Including: {0:s} from TAR into process queue.'.format(name))
|
||||
|
||||
yield path_spec_factory.Factory.NewPathSpec(
|
||||
definitions.TYPE_INDICATOR_TAR, location=name,
|
||||
parent=file_entry.path_spec)
|
||||
|
||||
except tarfile.ReadError:
|
||||
pass
|
||||
|
||||
file_object.close()
|
||||
|
||||
@classmethod
|
||||
def SmartOpenFiles(cls, file_entry, depth=0):
|
||||
"""Generate a list of all available PathSpecs extracted from a file.
|
||||
|
||||
Args:
|
||||
file_entry: A file entry object.
|
||||
depth: Incrementing number that defines the current depth into
|
||||
a file (file inside a ZIP file is depth 1, file inside a tar.gz
|
||||
would be of depth 2).
|
||||
|
||||
Yields:
|
||||
A file entry object (instance of dfvfs.FileEntry).
|
||||
"""
|
||||
if depth >= cls.MAX_FILE_DEPTH:
|
||||
return
|
||||
|
||||
for path_spec in cls._SmartOpenFile(file_entry):
|
||||
sub_file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)
|
||||
if sub_file_entry is None:
|
||||
logging.debug(
|
||||
u'Unable to open file: {0:s}'.format(path_spec.comparable))
|
||||
continue
|
||||
yield sub_file_entry
|
||||
|
||||
depth += 1
|
||||
for sub_file_entry in cls.SmartOpenFiles(sub_file_entry, depth=depth):
|
||||
yield sub_file_entry
|
||||
@@ -0,0 +1,421 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Generic collector that supports both file system and image files."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dfvfs.helpers import file_system_searcher
|
||||
from dfvfs.lib import definitions as dfvfs_definitions
|
||||
from dfvfs.lib import errors as dfvfs_errors
|
||||
from dfvfs.path import factory as path_spec_factory
|
||||
from dfvfs.resolver import resolver as path_spec_resolver
|
||||
|
||||
from plaso.engine import queue
|
||||
from plaso.lib import errors
|
||||
|
||||
|
||||
class Collector(queue.ItemQueueProducer):
|
||||
"""Class that implements a collector object."""
|
||||
|
||||
def __init__(
|
||||
self, process_queue, source_path, source_path_spec,
|
||||
resolver_context=None):
|
||||
"""Initializes the collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
process_queue: The process queue (instance of Queue). This queue contains
|
||||
the file entries that need to be processed.
|
||||
source_path: Path of the source file or directory.
|
||||
source_path_spec: The source path specification (instance of
|
||||
dfvfs.PathSpec) as determined by the file system
|
||||
scanner. The default is None.
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None.
|
||||
"""
|
||||
super(Collector, self).__init__(process_queue)
|
||||
self._filter_find_specs = None
|
||||
self._fs_collector = FileSystemCollector(process_queue)
|
||||
self._resolver_context = resolver_context
|
||||
# TODO: remove the need to pass source_path
|
||||
self._source_path = os.path.abspath(source_path)
|
||||
self._source_path_spec = source_path_spec
|
||||
self._vss_stores = None
|
||||
|
||||
def __enter__(self):
|
||||
"""Enters a with statement."""
|
||||
return self
|
||||
|
||||
def __exit__(self, unused_type, unused_value, unused_traceback):
|
||||
"""Exits a with statement."""
|
||||
return
|
||||
|
||||
def _ProcessImage(self, volume_path_spec, find_specs=None):
|
||||
"""Processes a volume within a storage media image.
|
||||
|
||||
Args:
|
||||
volume_path_spec: The path specification of the volume containing
|
||||
the file system.
|
||||
find_specs: Optional list of find specifications (instances of
|
||||
dfvfs.FindSpec). The default is None.
|
||||
"""
|
||||
if find_specs:
|
||||
logging.debug(u'Collecting from image file: {0:s} with filter'.format(
|
||||
self._source_path))
|
||||
else:
|
||||
logging.debug(u'Collecting from image file: {0:s}'.format(
|
||||
self._source_path))
|
||||
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
|
||||
parent=volume_path_spec)
|
||||
|
||||
try:
|
||||
file_system = path_spec_resolver.Resolver.OpenFileSystem(
|
||||
path_spec, resolver_context=self._resolver_context)
|
||||
except IOError as exception:
|
||||
logging.error(
|
||||
u'Unable to open file system with error: {0:s}'.format(exception))
|
||||
return
|
||||
|
||||
try:
|
||||
self._fs_collector.Collect(
|
||||
file_system, path_spec, find_specs=find_specs)
|
||||
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
|
||||
logging.warning(u'{0:s}'.format(exception))
|
||||
|
||||
if find_specs:
|
||||
logging.debug(u'Collection from image with filter FAILED.')
|
||||
else:
|
||||
logging.debug(u'Collection from image FAILED.')
|
||||
return
|
||||
|
||||
if self._abort:
|
||||
return
|
||||
|
||||
if self._vss_stores:
|
||||
self._ProcessVSS(volume_path_spec, find_specs=find_specs)
|
||||
|
||||
if find_specs:
|
||||
logging.debug(u'Collection from image with filter COMPLETED.')
|
||||
else:
|
||||
logging.debug(u'Collection from image COMPLETED.')
|
||||
|
||||
def _ProcessVSS(self, volume_path_spec, find_specs=None):
|
||||
"""Processes a VSS volume within a storage media image.
|
||||
|
||||
Args:
|
||||
volume_path_spec: The path specification of the volume containing
|
||||
the file system.
|
||||
find_specs: Optional list of find specifications (instances of
|
||||
dfvfs.FindSpec). The default is None.
|
||||
"""
|
||||
logging.info(u'Processing VSS.')
|
||||
|
||||
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/',
|
||||
parent=volume_path_spec)
|
||||
|
||||
vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
|
||||
vss_path_spec, resolver_context=self._resolver_context)
|
||||
|
||||
number_of_vss = vss_file_entry.number_of_sub_file_entries
|
||||
|
||||
# In plaso 1 represents the first store index in dfvfs and pyvshadow 0
|
||||
# represents the first store index so 1 is subtracted.
|
||||
vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]
|
||||
|
||||
for store_index in vss_store_range:
|
||||
if self._abort:
|
||||
return
|
||||
|
||||
if find_specs:
|
||||
logging.info((
|
||||
u'Collecting from VSS volume: {0:d} out of: {1:d} '
|
||||
u'with filter').format(store_index + 1, number_of_vss))
|
||||
else:
|
||||
logging.info(u'Collecting from VSS volume: {0:d} out of: {1:d}'.format(
|
||||
store_index + 1, number_of_vss))
|
||||
|
||||
vss_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index,
|
||||
parent=volume_path_spec)
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
|
||||
parent=vss_path_spec)
|
||||
|
||||
file_system = path_spec_resolver.Resolver.OpenFileSystem(
|
||||
path_spec, resolver_context=self._resolver_context)
|
||||
|
||||
try:
|
||||
self._fs_collector.Collect(
|
||||
file_system, path_spec, find_specs=find_specs)
|
||||
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
|
||||
logging.warning(u'{0:s}'.format(exception))
|
||||
|
||||
if find_specs:
|
||||
logging.debug(
|
||||
u'Collection from VSS store: {0:d} with filter FAILED.'.format(
|
||||
store_index + 1))
|
||||
else:
|
||||
logging.debug(u'Collection from VSS store: {0:d} FAILED.'.format(
|
||||
store_index + 1))
|
||||
return
|
||||
|
||||
if find_specs:
|
||||
logging.debug(
|
||||
u'Collection from VSS store: {0:d} with filter COMPLETED.'.format(
|
||||
store_index + 1))
|
||||
else:
|
||||
logging.debug(u'Collection from VSS store: {0:d} COMPLETED.'.format(
|
||||
store_index + 1))
|
||||
|
||||
def Collect(self):
|
||||
"""Collects files from the source."""
|
||||
source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
|
||||
self._source_path_spec, resolver_context=self._resolver_context)
|
||||
|
||||
if not source_file_entry:
|
||||
logging.warning(u'No files to collect.')
|
||||
self.SignalEndOfInput()
|
||||
return
|
||||
|
||||
if (not source_file_entry.IsDirectory() and
|
||||
not source_file_entry.IsFile() and
|
||||
not source_file_entry.IsDevice()):
|
||||
raise errors.CollectorError(
|
||||
u'Source path: {0:s} not a device, file or directory.'.format(
|
||||
self._source_path))
|
||||
|
||||
type_indicator = self._source_path_spec.type_indicator
|
||||
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
|
||||
if source_file_entry.IsFile():
|
||||
self.ProduceItem(self._source_path_spec)
|
||||
|
||||
else:
|
||||
file_system = path_spec_resolver.Resolver.OpenFileSystem(
|
||||
self._source_path_spec, resolver_context=self._resolver_context)
|
||||
|
||||
try:
|
||||
self._fs_collector.Collect(
|
||||
file_system, self._source_path_spec,
|
||||
find_specs=self._filter_find_specs)
|
||||
except (dfvfs_errors.AccessError,
|
||||
dfvfs_errors.BackEndError) as exception:
|
||||
logging.warning(u'{0:s}'.format(exception))
|
||||
|
||||
else:
|
||||
self._ProcessImage(
|
||||
self._source_path_spec.parent, find_specs=self._filter_find_specs)
|
||||
|
||||
self.SignalEndOfInput()
|
||||
|
||||
def SetCollectDirectoryMetadata(self, collect_directory_metadata):
|
||||
"""Sets the collect directory metadata flag.
|
||||
|
||||
Args:
|
||||
collect_directory_metadata: Boolean value to indicate to collect
|
||||
directory metadata.
|
||||
"""
|
||||
self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata)
|
||||
|
||||
def SetFilter(self, filter_find_specs):
|
||||
"""Sets the collection filter find specifications.
|
||||
|
||||
Args:
|
||||
filter_find_specs: List of filter find specifications (instances of
|
||||
dfvfs.FindSpec).
|
||||
"""
|
||||
self._filter_find_specs = filter_find_specs
|
||||
|
||||
def SetVssInformation(self, vss_stores):
|
||||
"""Sets the Volume Shadow Snapshots (VSS) information.
|
||||
|
||||
This function will enable VSS collection.
|
||||
|
||||
Args:
|
||||
vss_stores: The range of VSS stores to include in the collection,
|
||||
where 1 represents the first store.
|
||||
"""
|
||||
self._vss_stores = vss_stores
|
||||
|
||||
def SignalAbort(self):
|
||||
"""Signals the producer to abort."""
|
||||
super(Collector, self).SignalAbort()
|
||||
self._fs_collector.SignalAbort()
|
||||
|
||||
|
||||
class FileSystemCollector(queue.ItemQueueProducer):
|
||||
"""Class that implements a file system collector object."""
|
||||
|
||||
def __init__(self, process_queue):
|
||||
"""Initializes the collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
process_queue: The process queue (instance of Queue). This queue contains
|
||||
the file entries that need to be processed.
|
||||
"""
|
||||
super(FileSystemCollector, self).__init__(process_queue)
|
||||
self._collect_directory_metadata = True
|
||||
self._duplicate_file_check = False
|
||||
self._hashlist = {}
|
||||
|
||||
self.number_of_file_entries = 0
|
||||
|
||||
def __enter__(self):
|
||||
"""Enters a with statement."""
|
||||
return self
|
||||
|
||||
def __exit__(self, unused_type, unused_value, unused_traceback):
|
||||
"""Exits a with statement."""
|
||||
return
|
||||
|
||||
def _CalculateNTFSTimeHash(self, file_entry):
|
||||
"""Return a hash value calculated from a NTFS file's metadata.
|
||||
|
||||
Args:
|
||||
file_entry: The file entry (instance of TSKFileEntry).
|
||||
|
||||
Returns:
|
||||
A hash value (string) that can be used to determine if a file's timestamp
|
||||
value has changed.
|
||||
"""
|
||||
stat_object = file_entry.GetStat()
|
||||
ret_hash = hashlib.md5()
|
||||
|
||||
ret_hash.update('atime:{0:d}.{1:d}'.format(
|
||||
getattr(stat_object, 'atime', 0),
|
||||
getattr(stat_object, 'atime_nano', 0)))
|
||||
|
||||
ret_hash.update('crtime:{0:d}.{1:d}'.format(
|
||||
getattr(stat_object, 'crtime', 0),
|
||||
getattr(stat_object, 'crtime_nano', 0)))
|
||||
|
||||
ret_hash.update('mtime:{0:d}.{1:d}'.format(
|
||||
getattr(stat_object, 'mtime', 0),
|
||||
getattr(stat_object, 'mtime_nano', 0)))
|
||||
|
||||
ret_hash.update('ctime:{0:d}.{1:d}'.format(
|
||||
getattr(stat_object, 'ctime', 0),
|
||||
getattr(stat_object, 'ctime_nano', 0)))
|
||||
|
||||
return ret_hash.hexdigest()
|
||||
|
||||
def _ProcessDirectory(self, file_entry):
|
||||
"""Processes a directory and extract its metadata if necessary."""
|
||||
# Need to do a breadth-first search otherwise we'll hit the Python
|
||||
# maximum recursion depth.
|
||||
sub_directories = []
|
||||
|
||||
for sub_file_entry in file_entry.sub_file_entries:
|
||||
if self._abort:
|
||||
return
|
||||
|
||||
try:
|
||||
if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
|
||||
continue
|
||||
except dfvfs_errors.BackEndError as exception:
|
||||
logging.warning(
|
||||
u'Unable to process file: {0:s} with error: {1:s}'.format(
|
||||
sub_file_entry.path_spec.comparable.replace(
|
||||
u'\n', u';'), exception))
|
||||
continue
|
||||
|
||||
# For TSK-based file entries only, ignore the virtual /$OrphanFiles
|
||||
# directory.
|
||||
if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
|
||||
if file_entry.IsRoot() and sub_file_entry.name == u'$OrphanFiles':
|
||||
continue
|
||||
|
||||
if sub_file_entry.IsDirectory():
|
||||
# This check is here to improve performance by not producing
|
||||
# path specifications that don't get processed.
|
||||
if self._collect_directory_metadata:
|
||||
self.ProduceItem(sub_file_entry.path_spec)
|
||||
self.number_of_file_entries += 1
|
||||
|
||||
sub_directories.append(sub_file_entry)
|
||||
|
||||
elif sub_file_entry.IsFile():
|
||||
# If we are dealing with a VSS we want to calculate a hash
|
||||
# value based on available timestamps and compare that to previously
|
||||
# calculated hash values, and only include the file into the queue if
|
||||
# the hash does not match.
|
||||
if self._duplicate_file_check:
|
||||
hash_value = self._CalculateNTFSTimeHash(sub_file_entry)
|
||||
|
||||
inode = getattr(sub_file_entry.path_spec, 'inode', 0)
|
||||
if inode in self._hashlist:
|
||||
if hash_value in self._hashlist[inode]:
|
||||
continue
|
||||
|
||||
self._hashlist.setdefault(inode, []).append(hash_value)
|
||||
|
||||
self.ProduceItem(sub_file_entry.path_spec)
|
||||
self.number_of_file_entries += 1
|
||||
|
||||
for sub_file_entry in sub_directories:
|
||||
if self._abort:
|
||||
return
|
||||
|
||||
try:
|
||||
self._ProcessDirectory(sub_file_entry)
|
||||
except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception:
|
||||
logging.warning(u'{0:s}'.format(exception))
|
||||
|
||||
def Collect(self, file_system, path_spec, find_specs=None):
|
||||
"""Collects files from the file system.
|
||||
|
||||
Args:
|
||||
file_system: The file system (instance of dfvfs.FileSystem).
|
||||
path_spec: The path specification (instance of dfvfs.PathSpec).
|
||||
find_specs: Optional list of find specifications (instances of
|
||||
dfvfs.FindSpec). The default is None.
|
||||
"""
|
||||
if find_specs:
|
||||
searcher = file_system_searcher.FileSystemSearcher(file_system, path_spec)
|
||||
|
||||
for path_spec in searcher.Find(find_specs=find_specs):
|
||||
if self._abort:
|
||||
return
|
||||
|
||||
self.ProduceItem(path_spec)
|
||||
self.number_of_file_entries += 1
|
||||
|
||||
else:
|
||||
file_entry = file_system.GetFileEntryByPathSpec(path_spec)
|
||||
|
||||
self._ProcessDirectory(file_entry)
|
||||
|
||||
def SetCollectDirectoryMetadata(self, collect_directory_metadata):
|
||||
"""Sets the collect directory metadata flag.
|
||||
|
||||
Args:
|
||||
collect_directory_metadata: Boolean value to indicate to collect
|
||||
directory metadata.
|
||||
"""
|
||||
self._collect_directory_metadata = collect_directory_metadata
|
||||
@@ -0,0 +1,354 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2013 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The unit tests for the generic collector object."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from dfvfs.helpers import file_system_searcher
|
||||
from dfvfs.lib import definitions as dfvfs_definitions
|
||||
from dfvfs.path import factory as path_spec_factory
|
||||
from dfvfs.resolver import context
|
||||
from dfvfs.resolver import resolver as path_spec_resolver
|
||||
|
||||
from plaso.engine import collector
|
||||
from plaso.engine import queue
|
||||
from plaso.engine import single_process
|
||||
from plaso.engine import utils as engine_utils
|
||||
|
||||
|
||||
class TempDirectory(object):
|
||||
"""A self cleaning temporary directory."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the temporary directory."""
|
||||
super(TempDirectory, self).__init__()
|
||||
self.name = u''
|
||||
|
||||
def __enter__(self):
|
||||
"""Make this work with the 'with' statement."""
|
||||
self.name = tempfile.mkdtemp()
|
||||
return self.name
|
||||
|
||||
def __exit__(self, unused_type, unused_value, unused_traceback):
|
||||
"""Make this work with the 'with' statement."""
|
||||
shutil.rmtree(self.name, True)
|
||||
|
||||
|
||||
class TestCollectorQueueConsumer(queue.ItemQueueConsumer):
|
||||
"""Class that implements a test collector queue consumer."""
|
||||
|
||||
def __init__(self, queue_object):
|
||||
"""Initializes the queue consumer.
|
||||
|
||||
Args:
|
||||
queue_object: the queue object (instance of Queue).
|
||||
"""
|
||||
super(TestCollectorQueueConsumer, self).__init__(queue_object)
|
||||
self.path_specs = []
|
||||
|
||||
def _ConsumeItem(self, path_spec):
|
||||
"""Consumes an item callback for ConsumeItems.
|
||||
|
||||
Args:
|
||||
path_spec: a path specification (instance of dfvfs.PathSpec).
|
||||
"""
|
||||
self.path_specs.append(path_spec)
|
||||
|
||||
@property
|
||||
def number_of_path_specs(self):
|
||||
"""The number of path specifications."""
|
||||
return len(self.path_specs)
|
||||
|
||||
def GetFilePaths(self):
|
||||
"""Retrieves a list of file paths from the path specifications."""
|
||||
file_paths = []
|
||||
for path_spec in self.path_specs:
|
||||
location = getattr(path_spec, 'location', None)
|
||||
if location is not None:
|
||||
file_paths.append(location)
|
||||
return file_paths
|
||||
|
||||
|
||||
class CollectorTestCase(unittest.TestCase):
|
||||
"""The collector test case."""
|
||||
|
||||
_TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
|
||||
|
||||
# Show full diff results, part of TestCase so does not follow our naming
|
||||
# conventions.
|
||||
maxDiff = None
|
||||
|
||||
def _GetTestFilePath(self, path_segments):
|
||||
"""Retrieves the path of a test file relative to the test data directory.
|
||||
|
||||
Args:
|
||||
path_segments: the path segments inside the test data directory.
|
||||
|
||||
Returns:
|
||||
A path of the test file.
|
||||
"""
|
||||
# Note that we need to pass the individual path segments to os.path.join
|
||||
# and not a list.
|
||||
return os.path.join(self._TEST_DATA_PATH, *path_segments)
|
||||
|
||||
|
||||
class CollectorTest(CollectorTestCase):
|
||||
"""Tests for the collector."""
|
||||
|
||||
def testFileSystemCollection(self):
|
||||
"""Test collection on the file system."""
|
||||
test_files = [
|
||||
self._GetTestFilePath([u'syslog.tgz']),
|
||||
self._GetTestFilePath([u'syslog.zip']),
|
||||
self._GetTestFilePath([u'syslog.bz2']),
|
||||
self._GetTestFilePath([u'wtmp.1'])]
|
||||
|
||||
with TempDirectory() as dirname:
|
||||
for a_file in test_files:
|
||||
shutil.copy(a_file, dirname)
|
||||
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
|
||||
|
||||
test_collection_queue = single_process.SingleProcessQueue()
|
||||
resolver_context = context.Context()
|
||||
test_collector = collector.Collector(
|
||||
test_collection_queue, dirname, path_spec,
|
||||
resolver_context=resolver_context)
|
||||
test_collector.Collect()
|
||||
|
||||
test_collector_queue_consumer = TestCollectorQueueConsumer(
|
||||
test_collection_queue)
|
||||
test_collector_queue_consumer.ConsumeItems()
|
||||
|
||||
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
|
||||
|
||||
def testFileSystemWithFilterCollection(self):
|
||||
"""Test collection on the file system with a filter."""
|
||||
dirname = u'.'
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
|
||||
|
||||
filter_name = ''
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
filter_name = temp_file.name
|
||||
temp_file.write('/test_data/testdir/filter_.+.txt\n')
|
||||
temp_file.write('/test_data/.+evtx\n')
|
||||
temp_file.write('/AUTHORS\n')
|
||||
temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
|
||||
|
||||
test_collection_queue = single_process.SingleProcessQueue()
|
||||
resolver_context = context.Context()
|
||||
test_collector = collector.Collector(
|
||||
test_collection_queue, dirname, path_spec,
|
||||
resolver_context=resolver_context)
|
||||
|
||||
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
|
||||
test_collector.SetFilter(find_specs)
|
||||
|
||||
test_collector.Collect()
|
||||
|
||||
test_collector_queue_consumer = TestCollectorQueueConsumer(
|
||||
test_collection_queue)
|
||||
test_collector_queue_consumer.ConsumeItems()
|
||||
|
||||
try:
|
||||
os.remove(filter_name)
|
||||
except (OSError, IOError) as exception:
|
||||
logging.warning((
|
||||
u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
|
||||
filter_name, exception))
|
||||
|
||||
# Two files with test_data/testdir/filter_*.txt, AUTHORS
|
||||
# and test_data/System.evtx.
|
||||
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
|
||||
|
||||
paths = test_collector_queue_consumer.GetFilePaths()
|
||||
|
||||
current_directory = os.getcwd()
|
||||
|
||||
expected_path = os.path.join(
|
||||
current_directory, u'test_data', u'testdir', u'filter_1.txt')
|
||||
self.assertTrue(expected_path in paths)
|
||||
|
||||
expected_path = os.path.join(
|
||||
current_directory, u'test_data', u'testdir', u'filter_2.txt')
|
||||
self.assertFalse(expected_path in paths)
|
||||
|
||||
expected_path = os.path.join(
|
||||
current_directory, u'test_data', u'testdir', u'filter_3.txt')
|
||||
self.assertTrue(expected_path in paths)
|
||||
|
||||
expected_path = os.path.join(
|
||||
current_directory, u'AUTHORS')
|
||||
self.assertTrue(expected_path in paths)
|
||||
|
||||
def testImageCollection(self):
|
||||
"""Test collection on a storage media image file.
|
||||
|
||||
This images has two files:
|
||||
+ logs/hidden.zip
|
||||
+ logs/sys.tgz
|
||||
|
||||
The hidden.zip file contains one file, syslog, which is the
|
||||
same for sys.tgz.
|
||||
|
||||
The end results should therefore be:
|
||||
+ logs/hidden.zip (unchanged)
|
||||
+ logs/hidden.zip:syslog (the text file extracted out)
|
||||
+ logs/sys.tgz (unchanged)
|
||||
+ logs/sys.tgz (read as a GZIP file, so not compressed)
|
||||
+ logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
|
||||
+ logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)
|
||||
|
||||
This means that the collection script should collect 6 files in total.
|
||||
"""
|
||||
test_file = self._GetTestFilePath([u'syslog_image.dd'])
|
||||
|
||||
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
|
||||
parent=volume_path_spec)
|
||||
|
||||
test_collection_queue = single_process.SingleProcessQueue()
|
||||
resolver_context = context.Context()
|
||||
test_collector = collector.Collector(
|
||||
test_collection_queue, test_file, path_spec,
|
||||
resolver_context=resolver_context)
|
||||
test_collector.Collect()
|
||||
|
||||
test_collector_queue_consumer = TestCollectorQueueConsumer(
|
||||
test_collection_queue)
|
||||
test_collector_queue_consumer.ConsumeItems()
|
||||
|
||||
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 3)
|
||||
|
||||
def testImageWithFilterCollection(self):
|
||||
"""Test collection on a storage media image file with a filter."""
|
||||
test_file = self._GetTestFilePath([u'ímynd.dd'])
|
||||
|
||||
volume_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
|
||||
parent=volume_path_spec)
|
||||
|
||||
filter_name = ''
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
filter_name = temp_file.name
|
||||
temp_file.write('/a_directory/.+zip\n')
|
||||
temp_file.write('/a_directory/another.+\n')
|
||||
temp_file.write('/passwords.txt\n')
|
||||
|
||||
test_collection_queue = single_process.SingleProcessQueue()
|
||||
resolver_context = context.Context()
|
||||
test_collector = collector.Collector(
|
||||
test_collection_queue, test_file, path_spec,
|
||||
resolver_context=resolver_context)
|
||||
|
||||
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
|
||||
test_collector.SetFilter(find_specs)
|
||||
|
||||
test_collector.Collect()
|
||||
|
||||
test_collector_queue_consumer = TestCollectorQueueConsumer(
|
||||
test_collection_queue)
|
||||
test_collector_queue_consumer.ConsumeItems()
|
||||
|
||||
try:
|
||||
os.remove(filter_name)
|
||||
except (OSError, IOError) as exception:
|
||||
logging.warning((
|
||||
u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
|
||||
filter_name, exception))
|
||||
|
||||
self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
|
||||
|
||||
paths = test_collector_queue_consumer.GetFilePaths()
|
||||
|
||||
# path_specs[0]
|
||||
# type: TSK
|
||||
# file_path: '/a_directory/another_file'
|
||||
# container_path: 'test_data/ímynd.dd'
|
||||
# image_offset: 0
|
||||
self.assertEquals(paths[0], u'/a_directory/another_file')
|
||||
|
||||
# path_specs[1]
|
||||
# type: TSK
|
||||
# file_path: '/passwords.txt'
|
||||
# container_path: 'test_data/ímynd.dd'
|
||||
# image_offset: 0
|
||||
self.assertEquals(paths[1], u'/passwords.txt')
|
||||
|
||||
|
||||
class BuildFindSpecsFromFileTest(unittest.TestCase):
|
||||
"""Tests for the BuildFindSpecsFromFile function."""
|
||||
|
||||
def testBuildFindSpecsFromFile(self):
|
||||
"""Tests the BuildFindSpecsFromFile function."""
|
||||
filter_name = ''
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
filter_name = temp_file.name
|
||||
# 2 hits.
|
||||
temp_file.write('/test_data/testdir/filter_.+.txt\n')
|
||||
# A single hit.
|
||||
temp_file.write('/test_data/.+evtx\n')
|
||||
# A single hit.
|
||||
temp_file.write('/AUTHORS\n')
|
||||
temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
|
||||
# This should not compile properly, missing file information.
|
||||
temp_file.write('failing/\n')
|
||||
# This should not fail during initial loading, but fail later on.
|
||||
temp_file.write('bad re (no close on that parenthesis/file\n')
|
||||
|
||||
find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
|
||||
|
||||
try:
|
||||
os.remove(filter_name)
|
||||
except (OSError, IOError) as exception:
|
||||
logging.warning(
|
||||
u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
|
||||
filter_name, exception))
|
||||
|
||||
self.assertEquals(len(find_specs), 4)
|
||||
|
||||
dirname = u'.'
|
||||
path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
|
||||
file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
|
||||
searcher = file_system_searcher.FileSystemSearcher(
|
||||
file_system, path_spec)
|
||||
|
||||
path_spec_generator = searcher.Find(find_specs=find_specs)
|
||||
self.assertNotEquals(path_spec_generator, None)
|
||||
|
||||
path_specs = list(path_spec_generator)
|
||||
# One evtx, one AUTHORS, two filter_*.txt files, total 4 files.
|
||||
self.assertEquals(len(path_specs), 4)
|
||||
|
||||
with self.assertRaises(IOError):
|
||||
_ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,319 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2012 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The processing engine."""
|
||||
|
||||
import abc
|
||||
import logging
|
||||
|
||||
from dfvfs.helpers import file_system_searcher
|
||||
from dfvfs.lib import definitions as dfvfs_definitions
|
||||
from dfvfs.resolver import resolver as path_spec_resolver
|
||||
|
||||
from plaso.artifacts import knowledge_base
|
||||
from plaso.engine import collector
|
||||
from plaso.engine import queue
|
||||
from plaso.lib import errors
|
||||
from plaso.preprocessors import interface as preprocess_interface
|
||||
from plaso.preprocessors import manager as preprocess_manager
|
||||
|
||||
|
||||
class BaseEngine(object):
|
||||
"""Class that defines the processing engine base."""
|
||||
|
||||
def __init__(self, collection_queue, storage_queue, parse_error_queue):
|
||||
"""Initialize the engine object.
|
||||
|
||||
Args:
|
||||
collection_queue: the collection queue object (instance of Queue).
|
||||
storage_queue: the storage queue object (instance of Queue).
|
||||
parse_error_queue: the parser error queue object (instance of Queue).
|
||||
"""
|
||||
self._collection_queue = collection_queue
|
||||
self._enable_debug_output = False
|
||||
self._enable_profiling = False
|
||||
self._event_queue_producer = queue.ItemQueueProducer(storage_queue)
|
||||
self._filter_object = None
|
||||
self._mount_path = None
|
||||
self._open_files = False
|
||||
self._parse_error_queue = parse_error_queue
|
||||
self._parse_error_queue_producer = queue.ItemQueueProducer(
|
||||
parse_error_queue)
|
||||
self._profiling_sample_rate = 1000
|
||||
self._source = None
|
||||
self._source_path_spec = None
|
||||
self._source_file_entry = None
|
||||
self._text_prepend = None
|
||||
|
||||
self.knowledge_base = knowledge_base.KnowledgeBase()
|
||||
self.storage_queue = storage_queue
|
||||
|
||||
def CreateCollector(
|
||||
self, include_directory_stat, vss_stores=None, filter_find_specs=None,
|
||||
resolver_context=None):
|
||||
"""Creates a collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
include_directory_stat: Boolean value to indicate whether directory
|
||||
stat information should be collected.
|
||||
vss_stores: Optional list of VSS stores to include in the collection,
|
||||
where 1 represents the first store. Set to None if no
|
||||
VSS stores should be processed. The default is None.
|
||||
filter_find_specs: Optional list of filter find specifications (instances
|
||||
of dfvfs.FindSpec). The default is None.
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None. Note that every thread or process
|
||||
must have its own resolver context.
|
||||
|
||||
Returns:
|
||||
A collector object (instance of Collector).
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_path_spec:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
collector_object = collector.Collector(
|
||||
self._collection_queue, self._source, self._source_path_spec,
|
||||
resolver_context=resolver_context)
|
||||
|
||||
collector_object.SetCollectDirectoryMetadata(include_directory_stat)
|
||||
|
||||
if vss_stores:
|
||||
collector_object.SetVssInformation(vss_stores)
|
||||
|
||||
if filter_find_specs:
|
||||
collector_object.SetFilter(filter_find_specs)
|
||||
|
||||
return collector_object
|
||||
|
||||
@abc.abstractmethod
|
||||
def CreateExtractionWorker(self, worker_number):
|
||||
"""Creates an extraction worker object.
|
||||
|
||||
Args:
|
||||
worker_number: A number that identifies the worker.
|
||||
|
||||
Returns:
|
||||
An extraction worker (instance of worker.ExtractionWorker).
|
||||
"""
|
||||
|
||||
def GetSourceFileSystemSearcher(self, resolver_context=None):
|
||||
"""Retrieves the file system searcher of the source.
|
||||
|
||||
Args:
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None. Note that every thread or process
|
||||
must have its own resolver context.
|
||||
|
||||
Returns:
|
||||
The file system searcher object (instance of dfvfs.FileSystemSearcher).
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_path_spec:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
file_system = path_spec_resolver.Resolver.OpenFileSystem(
|
||||
self._source_path_spec, resolver_context=resolver_context)
|
||||
|
||||
type_indicator = self._source_path_spec.type_indicator
|
||||
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS:
|
||||
mount_point = self._source_path_spec
|
||||
else:
|
||||
mount_point = self._source_path_spec.parent
|
||||
|
||||
return file_system_searcher.FileSystemSearcher(file_system, mount_point)
|
||||
|
||||
def PreprocessSource(self, platform, resolver_context=None):
|
||||
"""Preprocesses the source and fills the preprocessing object.
|
||||
|
||||
Args:
|
||||
platform: string that indicates the platform (operating system).
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None. Note that every thread or process
|
||||
must have its own resolver context.
|
||||
"""
|
||||
searcher = self.GetSourceFileSystemSearcher(
|
||||
resolver_context=resolver_context)
|
||||
if not platform:
|
||||
platform = preprocess_interface.GuessOS(searcher)
|
||||
self.knowledge_base.platform = platform
|
||||
|
||||
preprocess_manager.PreprocessPluginsManager.RunPlugins(
|
||||
platform, searcher, self.knowledge_base)
|
||||
|
||||
def SetEnableDebugOutput(self, enable_debug_output):
|
||||
"""Enables or disables debug output.
|
||||
|
||||
Args:
|
||||
enable_debug_output: boolean value to indicate if the debug output
|
||||
should be enabled.
|
||||
"""
|
||||
self._enable_debug_output = enable_debug_output
|
||||
|
||||
def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
|
||||
"""Enables or disables profiling.
|
||||
|
||||
Args:
|
||||
enable_debug_output: boolean value to indicate if the profiling
|
||||
should be enabled.
|
||||
profiling_sample_rate: optional integer indicating the profiling sample
|
||||
rate. The value contains the number of files
|
||||
processed. The default value is 1000.
|
||||
"""
|
||||
self._enable_profiling = enable_profiling
|
||||
self._profiling_sample_rate = profiling_sample_rate
|
||||
|
||||
def SetFilterObject(self, filter_object):
|
||||
"""Sets the filter object.
|
||||
|
||||
Args:
|
||||
filter_object: the filter object (instance of objectfilter.Filter).
|
||||
"""
|
||||
self._filter_object = filter_object
|
||||
|
||||
def SetMountPath(self, mount_path):
|
||||
"""Sets the mount path.
|
||||
|
||||
Args:
|
||||
mount_path: string containing the mount path.
|
||||
"""
|
||||
self._mount_path = mount_path
|
||||
|
||||
# TODO: rename this mode.
|
||||
def SetOpenFiles(self, open_files):
|
||||
"""Sets the open files mode.
|
||||
|
||||
Args:
|
||||
open_files: boolean value to indicate if the worker should scan for
|
||||
file entries inside files.
|
||||
"""
|
||||
self._open_files = open_files
|
||||
|
||||
def SetSource(self, source_path_spec, resolver_context=None):
|
||||
"""Sets the source.
|
||||
|
||||
Args:
|
||||
source_path_spec: The source path specification (instance of
|
||||
dfvfs.PathSpec) as determined by the file system
|
||||
scanner. The default is None.
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None. Note that every thread or process
|
||||
must have its own resolver context.
|
||||
|
||||
Raises:
|
||||
BadConfigOption: if source cannot be set.
|
||||
"""
|
||||
path_spec = source_path_spec
|
||||
while path_spec.parent:
|
||||
path_spec = path_spec.parent
|
||||
|
||||
# Note that source should be used for output purposes only.
|
||||
self._source = getattr(path_spec, 'location', u'')
|
||||
self._source_path_spec = source_path_spec
|
||||
|
||||
self._source_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
|
||||
self._source_path_spec, resolver_context=resolver_context)
|
||||
|
||||
if not self._source_file_entry:
|
||||
raise errors.BadConfigOption(
|
||||
u'No such device, file or directory: {0:s}.'.format(self._source))
|
||||
|
||||
if (not self._source_file_entry.IsDirectory() and
|
||||
not self._source_file_entry.IsFile() and
|
||||
not self._source_file_entry.IsDevice()):
|
||||
raise errors.CollectorError(
|
||||
u'Source path: {0:s} not a device, file or directory.'.format(
|
||||
self._source))
|
||||
|
||||
if self._source_path_spec.type_indicator in [
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS,
|
||||
dfvfs_definitions.TYPE_INDICATOR_FAKE]:
|
||||
|
||||
if self._source_file_entry.IsFile():
|
||||
logging.debug(u'Starting a collection on a single file.')
|
||||
# No need for multiple workers when parsing a single file.
|
||||
|
||||
elif not self._source_file_entry.IsDirectory():
|
||||
raise errors.BadConfigOption(
|
||||
u'Source: {0:s} has to be a file or directory.'.format(
|
||||
self._source))
|
||||
|
||||
# TODO: remove this functionality.
|
||||
def SetTextPrepend(self, text_prepend):
|
||||
"""Sets the text prepend.
|
||||
|
||||
Args:
|
||||
text_prepend: string that contains the text to prepend to every
|
||||
event object.
|
||||
"""
|
||||
self._text_prepend = text_prepend
|
||||
|
||||
def SignalAbort(self):
|
||||
"""Signals the engine to abort."""
|
||||
logging.warning(u'Signalled abort.')
|
||||
self._event_queue_producer.SignalEndOfInput()
|
||||
self._parse_error_queue_producer.SignalEndOfInput()
|
||||
|
||||
def SignalEndOfInputStorageQueue(self):
|
||||
"""Signals the storage queue no input remains."""
|
||||
self._event_queue_producer.SignalEndOfInput()
|
||||
self._parse_error_queue_producer.SignalEndOfInput()
|
||||
|
||||
def SourceIsDirectory(self):
|
||||
"""Determines if the source is a directory.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_file_entry:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
return (not self.SourceIsStorageMediaImage() and
|
||||
self._source_file_entry.IsDirectory())
|
||||
|
||||
def SourceIsFile(self):
|
||||
"""Determines if the source is a file.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_file_entry:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
return (not self.SourceIsStorageMediaImage() and
|
||||
self._source_file_entry.IsFile())
|
||||
|
||||
def SourceIsStorageMediaImage(self):
|
||||
"""Determines if the source is storage media image file or device.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_path_spec:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
return self._source_path_spec.type_indicator not in [
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS,
|
||||
dfvfs_definitions.TYPE_INDICATOR_FAKE]
|
||||
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2012 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Queue management implementation for Plaso.
|
||||
|
||||
This file contains an implementation of a queue used by plaso for
|
||||
queue management.
|
||||
|
||||
The queue has been abstracted in order to provide support for different
|
||||
implementations of the queueing mechanism, to support multi processing and
|
||||
scalability.
|
||||
"""
|
||||
|
||||
import abc
|
||||
|
||||
from plaso.lib import errors
|
||||
|
||||
|
||||
class QueueEndOfInput(object):
|
||||
"""Class that implements a queue end of input."""
|
||||
|
||||
|
||||
class Queue(object):
|
||||
"""Class that implements the queue interface."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __len__(self):
|
||||
"""Returns the estimated current number of items in the queue."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def IsEmpty(self):
|
||||
"""Determines if the queue is empty."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def PushItem(self, item):
|
||||
"""Pushes an item onto the queue."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def PopItem(self):
|
||||
"""Pops an item off the queue."""
|
||||
|
||||
def SignalEndOfInput(self):
|
||||
"""Signals the queue no input remains."""
|
||||
self.PushItem(QueueEndOfInput())
|
||||
|
||||
|
||||
class QueueConsumer(object):
|
||||
"""Class that implements the queue consumer interface.
|
||||
|
||||
The consumer subscribes to updates on the queue.
|
||||
"""
|
||||
|
||||
def __init__(self, queue_object):
|
||||
"""Initializes the queue consumer.
|
||||
|
||||
Args:
|
||||
queue_object: the queue object (instance of Queue).
|
||||
"""
|
||||
super(QueueConsumer, self).__init__()
|
||||
self._abort = False
|
||||
self._queue = queue_object
|
||||
|
||||
def SignalAbort(self):
|
||||
"""Signals the consumer to abort."""
|
||||
self._abort = True
|
||||
|
||||
|
||||
class QueueProducer(object):
|
||||
"""Class that implements the queue producer interface.
|
||||
|
||||
The producer generates updates on the queue.
|
||||
"""
|
||||
|
||||
def __init__(self, queue_object):
|
||||
"""Initializes the queue producer.
|
||||
|
||||
Args:
|
||||
queue_object: the queue object (instance of Queue).
|
||||
"""
|
||||
super(QueueProducer, self).__init__()
|
||||
self._abort = False
|
||||
self._queue = queue_object
|
||||
|
||||
def SignalAbort(self):
|
||||
"""Signals the producer to abort."""
|
||||
self._abort = True
|
||||
|
||||
def SignalEndOfInput(self):
|
||||
"""Signals the queue no input remains."""
|
||||
self._queue.SignalEndOfInput()
|
||||
|
||||
|
||||
class EventObjectQueueConsumer(QueueConsumer):
|
||||
"""Class that implements the event object queue consumer.
|
||||
|
||||
The consumer subscribes to updates on the queue.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def _ConsumeEventObject(self, event_object, **kwargs):
|
||||
"""Consumes an event object callback for ConsumeEventObjects."""
|
||||
|
||||
def ConsumeEventObjects(self, **kwargs):
|
||||
"""Consumes the event object that are pushed on the queue.
|
||||
|
||||
This function will issue a callback to _ConsumeEventObject for every
|
||||
event object (instance of EventObject) consumed from the queue.
|
||||
|
||||
Args:
|
||||
kwargs: keyword arguments to pass to the _ConsumeEventObject callback.
|
||||
"""
|
||||
while not self._abort:
|
||||
try:
|
||||
item = self._queue.PopItem()
|
||||
except errors.QueueEmpty:
|
||||
break
|
||||
|
||||
if isinstance(item, QueueEndOfInput):
|
||||
# Push the item back onto the queue to make sure all
|
||||
# queue consumers are stopped.
|
||||
self._queue.PushItem(item)
|
||||
break
|
||||
|
||||
self._ConsumeEventObject(item, **kwargs)
|
||||
|
||||
self._abort = False
|
||||
|
||||
|
||||
class ItemQueueConsumer(QueueConsumer):
|
||||
"""Class that implements an item queue consumer.
|
||||
|
||||
The consumer subscribes to updates on the queue.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def _ConsumeItem(self, item):
|
||||
"""Consumes an item callback for ConsumeItems.
|
||||
|
||||
Args:
|
||||
item: the item object.
|
||||
"""
|
||||
|
||||
def ConsumeItems(self):
|
||||
"""Consumes the items that are pushed on the queue."""
|
||||
while not self._abort:
|
||||
try:
|
||||
item = self._queue.PopItem()
|
||||
except errors.QueueEmpty:
|
||||
break
|
||||
|
||||
if isinstance(item, QueueEndOfInput):
|
||||
# Push the item back onto the queue to make sure all
|
||||
# queue consumers are stopped.
|
||||
self._queue.PushItem(item)
|
||||
break
|
||||
|
||||
self._ConsumeItem(item)
|
||||
|
||||
self._abort = False
|
||||
|
||||
|
||||
class ItemQueueProducer(QueueProducer):
|
||||
"""Class that implements an item queue producer.
|
||||
|
||||
The producer generates updates on the queue.
|
||||
"""
|
||||
|
||||
def _FlushQueue(self):
|
||||
"""Flushes the queue callback for the QueueFull exception."""
|
||||
return
|
||||
|
||||
def ProduceItem(self, item):
|
||||
"""Produces an item onto the queue.
|
||||
|
||||
Args:
|
||||
item: the item object.
|
||||
"""
|
||||
try:
|
||||
self._queue.PushItem(item)
|
||||
except errors.QueueFull:
|
||||
self._FlushQueue()
|
||||
|
||||
def ProduceItems(self, items):
|
||||
"""Produces items onto the queue.
|
||||
|
||||
Args:
|
||||
items: a list or generator of item objects.
|
||||
"""
|
||||
for item in items:
|
||||
self.ProduceItem(item)
|
||||
@@ -0,0 +1,366 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The single process processing engine."""
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import pdb
|
||||
|
||||
from plaso.engine import collector
|
||||
from plaso.engine import engine
|
||||
from plaso.engine import queue
|
||||
from plaso.engine import worker
|
||||
from plaso.lib import errors
|
||||
from plaso.parsers import context as parsers_context
|
||||
|
||||
|
||||
class SingleProcessCollector(collector.Collector):
|
||||
"""Class that implements a single process collector object."""
|
||||
|
||||
def __init__(
|
||||
self, process_queue, source_path, source_path_spec,
|
||||
resolver_context=None):
|
||||
"""Initializes the collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
process_queue: The process queue (instance of Queue). This queue contains
|
||||
the file entries that need to be processed.
|
||||
source_path: Path of the source file or directory.
|
||||
source_path_spec: The source path specification (instance of
|
||||
dfvfs.PathSpec) as determined by the file system
|
||||
scanner. The default is None.
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None.
|
||||
"""
|
||||
super(SingleProcessCollector, self).__init__(
|
||||
process_queue, source_path, source_path_spec,
|
||||
resolver_context=resolver_context)
|
||||
|
||||
self._extraction_worker = None
|
||||
self._fs_collector = SingleProcessFileSystemCollector(process_queue)
|
||||
|
||||
def _FlushQueue(self):
|
||||
"""Flushes the queue callback for the QueueFull exception."""
|
||||
while not self._queue.IsEmpty():
|
||||
logging.debug(u'Extraction worker started.')
|
||||
self._extraction_worker.Run()
|
||||
logging.debug(u'Extraction worker stopped.')
|
||||
|
||||
def SetExtractionWorker(self, extraction_worker):
|
||||
"""Sets the extraction worker.
|
||||
|
||||
Args:
|
||||
extraction_worker: the extraction worker object (instance of
|
||||
EventExtractionWorker).
|
||||
"""
|
||||
self._extraction_worker = extraction_worker
|
||||
|
||||
self._fs_collector.SetExtractionWorker(extraction_worker)
|
||||
|
||||
|
||||
class SingleProcessEngine(engine.BaseEngine):
|
||||
"""Class that defines the single process engine."""
|
||||
|
||||
def __init__(self, maximum_number_of_queued_items=0):
|
||||
"""Initialize the single process engine object.
|
||||
|
||||
Args:
|
||||
maximum_number_of_queued_items: The maximum number of queued items.
|
||||
The default is 0, which represents
|
||||
no limit.
|
||||
"""
|
||||
collection_queue = SingleProcessQueue(
|
||||
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
||||
storage_queue = SingleProcessQueue(
|
||||
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
||||
parse_error_queue = SingleProcessQueue(
|
||||
maximum_number_of_queued_items=maximum_number_of_queued_items)
|
||||
|
||||
super(SingleProcessEngine, self).__init__(
|
||||
collection_queue, storage_queue, parse_error_queue)
|
||||
|
||||
self._event_queue_producer = SingleProcessItemQueueProducer(storage_queue)
|
||||
self._parse_error_queue_producer = SingleProcessItemQueueProducer(
|
||||
parse_error_queue)
|
||||
|
||||
def CreateCollector(
|
||||
self, include_directory_stat, vss_stores=None, filter_find_specs=None,
|
||||
resolver_context=None):
|
||||
"""Creates a collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
include_directory_stat: Boolean value to indicate whether directory
|
||||
stat information should be collected.
|
||||
vss_stores: Optional list of VSS stores to include in the collection,
|
||||
where 1 represents the first store. Set to None if no
|
||||
VSS stores should be processed. The default is None.
|
||||
filter_find_specs: Optional list of filter find specifications (instances
|
||||
of dfvfs.FindSpec). The default is None.
|
||||
resolver_context: Optional resolver context (instance of dfvfs.Context).
|
||||
The default is None. Note that every thread or process
|
||||
must have its own resolver context.
|
||||
|
||||
Returns:
|
||||
A collector object (instance of Collector).
|
||||
|
||||
Raises:
|
||||
RuntimeError: if source path specification is not set.
|
||||
"""
|
||||
if not self._source_path_spec:
|
||||
raise RuntimeError(u'Missing source.')
|
||||
|
||||
collector_object = SingleProcessCollector(
|
||||
self._collection_queue, self._source, self._source_path_spec,
|
||||
resolver_context=resolver_context)
|
||||
|
||||
collector_object.SetCollectDirectoryMetadata(include_directory_stat)
|
||||
|
||||
if vss_stores:
|
||||
collector_object.SetVssInformation(vss_stores)
|
||||
|
||||
if filter_find_specs:
|
||||
collector_object.SetFilter(filter_find_specs)
|
||||
|
||||
return collector_object
|
||||
|
||||
def CreateExtractionWorker(self, worker_number):
|
||||
"""Creates an extraction worker object.
|
||||
|
||||
Args:
|
||||
worker_number: A number that identifies the worker.
|
||||
|
||||
Returns:
|
||||
An extraction worker (instance of worker.ExtractionWorker).
|
||||
"""
|
||||
parser_context = parsers_context.ParserContext(
|
||||
self._event_queue_producer, self._parse_error_queue_producer,
|
||||
self.knowledge_base)
|
||||
|
||||
extraction_worker = SingleProcessEventExtractionWorker(
|
||||
worker_number, self._collection_queue, self._event_queue_producer,
|
||||
self._parse_error_queue_producer, parser_context)
|
||||
|
||||
extraction_worker.SetEnableDebugOutput(self._enable_debug_output)
|
||||
|
||||
# TODO: move profiler in separate object.
|
||||
extraction_worker.SetEnableProfiling(
|
||||
self._enable_profiling,
|
||||
profiling_sample_rate=self._profiling_sample_rate)
|
||||
|
||||
if self._open_files:
|
||||
extraction_worker.SetOpenFiles(self._open_files)
|
||||
|
||||
if self._filter_object:
|
||||
extraction_worker.SetFilterObject(self._filter_object)
|
||||
|
||||
if self._mount_path:
|
||||
extraction_worker.SetMountPath(self._mount_path)
|
||||
|
||||
if self._text_prepend:
|
||||
extraction_worker.SetTextPrepend(self._text_prepend)
|
||||
|
||||
return extraction_worker
|
||||
|
||||
def ProcessSource(
|
||||
self, collector_object, storage_writer, parser_filter_string=None):
|
||||
"""Processes the source and extracts event objects.
|
||||
|
||||
Args:
|
||||
collector_object: A collector object (instance of Collector).
|
||||
storage_writer: A storage writer object (instance of BaseStorageWriter).
|
||||
parser_filter_string: Optional parser filter string. The default is None.
|
||||
"""
|
||||
extraction_worker = self.CreateExtractionWorker(0)
|
||||
|
||||
extraction_worker.InitalizeParserObjects(
|
||||
parser_filter_string=parser_filter_string)
|
||||
|
||||
# Set the extraction worker and storage writer values so that they
|
||||
# can be accessed if the QueueFull exception is raised. This is
|
||||
# needed in single process mode to prevent the queue consuming too
|
||||
# much memory.
|
||||
collector_object.SetExtractionWorker(extraction_worker)
|
||||
self._event_queue_producer.SetStorageWriter(storage_writer)
|
||||
self._parse_error_queue_producer.SetStorageWriter(storage_writer)
|
||||
|
||||
logging.debug(u'Processing started.')
|
||||
|
||||
logging.debug(u'Collection started.')
|
||||
collector_object.Collect()
|
||||
logging.debug(u'Collection stopped.')
|
||||
|
||||
logging.debug(u'Extraction worker started.')
|
||||
extraction_worker.Run()
|
||||
logging.debug(u'Extraction worker stopped.')
|
||||
|
||||
self._event_queue_producer.SignalEndOfInput()
|
||||
|
||||
logging.debug(u'Storage writer started.')
|
||||
storage_writer.WriteEventObjects()
|
||||
logging.debug(u'Storage writer stopped.')
|
||||
|
||||
# Reset the extraction worker and storage writer values to return
|
||||
# the objects in their original state. This will prevent access
|
||||
# to the extraction worker outside this function and allow it
|
||||
# to be garbage collected.
|
||||
self._event_queue_producer.SetStorageWriter(None)
|
||||
self._parse_error_queue_producer.SetStorageWriter(None)
|
||||
collector_object.SetExtractionWorker(None)
|
||||
|
||||
logging.debug(u'Processing completed.')
|
||||
|
||||
|
||||
class SingleProcessEventExtractionWorker(worker.BaseEventExtractionWorker):
|
||||
"""Class that defines the single process event extraction worker."""
|
||||
|
||||
def _DebugParseFileEntry(self):
|
||||
"""Callback for debugging file entry parsing failures."""
|
||||
pdb.post_mortem()
|
||||
|
||||
|
||||
class SingleProcessFileSystemCollector(collector.FileSystemCollector):
|
||||
"""Class that implements a single process file system collector object."""
|
||||
|
||||
def __init__(self, process_queue):
|
||||
"""Initializes the collector object.
|
||||
|
||||
The collector discovers all the files that need to be processed by
|
||||
the workers. Once a file is discovered it is added to the process queue
|
||||
as a path specification (instance of dfvfs.PathSpec).
|
||||
|
||||
Args:
|
||||
process_queue: The process queue (instance of Queue). This queue contains
|
||||
the file entries that need to be processed.
|
||||
"""
|
||||
super(SingleProcessFileSystemCollector, self).__init__(process_queue)
|
||||
|
||||
self._extraction_worker = None
|
||||
|
||||
def _FlushQueue(self):
|
||||
"""Flushes the queue callback for the QueueFull exception."""
|
||||
while not self._queue.IsEmpty():
|
||||
logging.debug(u'Extraction worker started.')
|
||||
self._extraction_worker.Run()
|
||||
logging.debug(u'Extraction worker stopped.')
|
||||
|
||||
def SetExtractionWorker(self, extraction_worker):
|
||||
"""Sets the extraction worker.
|
||||
|
||||
Args:
|
||||
extraction_worker: the extraction worker object (instance of
|
||||
EventExtractionWorker).
|
||||
"""
|
||||
self._extraction_worker = extraction_worker
|
||||
|
||||
|
||||
class SingleProcessItemQueueProducer(queue.ItemQueueProducer):
|
||||
"""Class that implements a single process item queue producer."""
|
||||
|
||||
def __init__(self, queue_object):
|
||||
"""Initializes the queue producer.
|
||||
|
||||
Args:
|
||||
queue_object: the queue object (instance of Queue).
|
||||
"""
|
||||
super(SingleProcessItemQueueProducer, self).__init__(queue_object)
|
||||
|
||||
self._storage_writer = None
|
||||
|
||||
def _FlushQueue(self):
|
||||
"""Flushes the queue callback for the QueueFull exception."""
|
||||
logging.debug(u'Storage writer started.')
|
||||
self._storage_writer.WriteEventObjects()
|
||||
logging.debug(u'Storage writer stopped.')
|
||||
|
||||
def SetStorageWriter(self, storage_writer):
|
||||
"""Sets the storage writer.
|
||||
|
||||
Args:
|
||||
storage_writer: the storage writer object (instance of
|
||||
BaseStorageWriter).
|
||||
"""
|
||||
self._storage_writer = storage_writer
|
||||
|
||||
|
||||
class SingleProcessQueue(queue.Queue):
|
||||
"""Single process queue."""
|
||||
|
||||
def __init__(self, maximum_number_of_queued_items=0):
|
||||
"""Initializes a single process queue object.
|
||||
|
||||
Args:
|
||||
maximum_number_of_queued_items: The maximum number of queued items.
|
||||
The default is 0, which represents
|
||||
no limit.
|
||||
"""
|
||||
super(SingleProcessQueue, self).__init__()
|
||||
|
||||
# The Queue interface defines the maximum number of queued items to be
|
||||
# 0 if unlimited as does the multi processing queue, but deque uses
|
||||
# None to indicate no limit.
|
||||
if maximum_number_of_queued_items == 0:
|
||||
maximum_number_of_queued_items = None
|
||||
|
||||
# maxlen contains the maximum number of items allowed to be queued,
|
||||
# where None represents unlimited.
|
||||
self._queue = collections.deque(
|
||||
maxlen=maximum_number_of_queued_items)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the estimated current number of items in the queue."""
|
||||
return len(self._queue)
|
||||
|
||||
def IsEmpty(self):
|
||||
"""Determines if the queue is empty."""
|
||||
return len(self._queue) == 0
|
||||
|
||||
def PushItem(self, item):
|
||||
"""Pushes an item onto the queue.
|
||||
|
||||
Raises:
|
||||
QueueFull: when the queue is full.
|
||||
"""
|
||||
number_of_items = len(self._queue)
|
||||
|
||||
# Deque will drop the first item in the queue when maxlen is exceeded.
|
||||
if not self._queue.maxlen or number_of_items < self._queue.maxlen:
|
||||
self._queue.append(item)
|
||||
number_of_items += 1
|
||||
|
||||
if self._queue.maxlen and number_of_items == self._queue.maxlen:
|
||||
raise errors.QueueFull
|
||||
|
||||
def PopItem(self):
|
||||
"""Pops an item off the queue.
|
||||
|
||||
Raises:
|
||||
QueueEmpty: when the queue is empty.
|
||||
"""
|
||||
try:
|
||||
# Using popleft to have FIFO behavior.
|
||||
return self._queue.popleft()
|
||||
except IndexError:
|
||||
raise errors.QueueEmpty
|
||||
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests the single process processing engine."""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from dfvfs.lib import definitions as dfvfs_definitions
|
||||
from dfvfs.helpers import file_system_searcher
|
||||
from dfvfs.path import factory as path_spec_factory
|
||||
from dfvfs.resolver import context
|
||||
|
||||
from plaso.engine import single_process
|
||||
from plaso.engine import test_lib
|
||||
from plaso.lib import errors
|
||||
|
||||
|
||||
class SingleProcessQueueTest(unittest.TestCase):
|
||||
"""Tests the single process queue."""
|
||||
|
||||
_ITEMS = frozenset(['item1', 'item2', 'item3', 'item4'])
|
||||
|
||||
def testPushPopItem(self):
|
||||
"""Tests the PushItem and PopItem functions."""
|
||||
test_queue = single_process.SingleProcessQueue()
|
||||
|
||||
for item in self._ITEMS:
|
||||
test_queue.PushItem(item)
|
||||
|
||||
self.assertEquals(len(test_queue), len(self._ITEMS))
|
||||
|
||||
test_queue.SignalEndOfInput()
|
||||
test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
|
||||
test_queue_consumer.ConsumeItems()
|
||||
|
||||
expected_number_of_items = len(self._ITEMS)
|
||||
self.assertEquals(
|
||||
test_queue_consumer.number_of_items, expected_number_of_items)
|
||||
|
||||
def testQueueEmpty(self):
|
||||
"""Tests the queue raises the QueueEmpty exception."""
|
||||
test_queue = single_process.SingleProcessQueue()
|
||||
|
||||
with self.assertRaises(errors.QueueEmpty):
|
||||
test_queue.PopItem()
|
||||
|
||||
def testQueueFull(self):
|
||||
"""Tests the queue raises the QueueFull exception."""
|
||||
test_queue = single_process.SingleProcessQueue(
|
||||
maximum_number_of_queued_items=5)
|
||||
|
||||
for item in self._ITEMS:
|
||||
test_queue.PushItem(item)
|
||||
|
||||
with self.assertRaises(errors.QueueFull):
|
||||
test_queue.PushItem('item5')
|
||||
|
||||
with self.assertRaises(errors.QueueFull):
|
||||
test_queue.PushItem('item6')
|
||||
|
||||
test_queue_consumer = test_lib.TestQueueConsumer(test_queue)
|
||||
test_queue_consumer.ConsumeItems()
|
||||
|
||||
expected_number_of_items = len(self._ITEMS)
|
||||
self.assertEquals(
|
||||
test_queue_consumer.number_of_items, expected_number_of_items + 1)
|
||||
|
||||
|
||||
class SingleProcessEngineTest(unittest.TestCase):
|
||||
"""Tests for the engine object."""
|
||||
|
||||
_TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data')
|
||||
|
||||
def testEngine(self):
|
||||
"""Test the engine functionality."""
|
||||
resolver_context = context.Context()
|
||||
test_engine = single_process.SingleProcessEngine(
|
||||
maximum_number_of_queued_items=25000)
|
||||
|
||||
self.assertNotEquals(test_engine, None)
|
||||
|
||||
source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd')
|
||||
os_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
|
||||
source_path_spec = path_spec_factory.Factory.NewPathSpec(
|
||||
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
|
||||
parent=os_path_spec)
|
||||
|
||||
test_engine.SetSource(source_path_spec, resolver_context=resolver_context)
|
||||
|
||||
self.assertFalse(test_engine.SourceIsDirectory())
|
||||
self.assertFalse(test_engine.SourceIsFile())
|
||||
self.assertTrue(test_engine.SourceIsStorageMediaImage())
|
||||
|
||||
test_searcher = test_engine.GetSourceFileSystemSearcher(
|
||||
resolver_context=resolver_context)
|
||||
self.assertNotEquals(test_searcher, None)
|
||||
self.assertIsInstance(
|
||||
test_searcher, file_system_searcher.FileSystemSearcher)
|
||||
|
||||
test_engine.PreprocessSource('Windows')
|
||||
|
||||
test_collector = test_engine.CreateCollector(
|
||||
False, vss_stores=None, filter_find_specs=None,
|
||||
resolver_context=resolver_context)
|
||||
self.assertNotEquals(test_collector, None)
|
||||
self.assertIsInstance(
|
||||
test_collector, single_process.SingleProcessCollector)
|
||||
|
||||
test_extraction_worker = test_engine.CreateExtractionWorker(0)
|
||||
self.assertNotEquals(test_extraction_worker, None)
|
||||
self.assertIsInstance(
|
||||
test_extraction_worker,
|
||||
single_process.SingleProcessEventExtractionWorker)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the 'License');
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Engine related functions and classes for testing."""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from plaso.engine import queue
|
||||
|
||||
|
||||
class TestQueueConsumer(queue.ItemQueueConsumer):
|
||||
"""Class that implements the test queue consumer.
|
||||
|
||||
The queue consumer subscribes to updates on the queue.
|
||||
"""
|
||||
|
||||
def __init__(self, test_queue):
|
||||
"""Initializes the queue consumer.
|
||||
|
||||
Args:
|
||||
test_queue: the test queue (instance of Queue).
|
||||
"""
|
||||
super(TestQueueConsumer, self).__init__(test_queue)
|
||||
self.items = []
|
||||
|
||||
def _ConsumeItem(self, item):
|
||||
"""Consumes an item callback for ConsumeItems."""
|
||||
self.items.append(item)
|
||||
|
||||
@property
|
||||
def number_of_items(self):
|
||||
"""The number of items."""
|
||||
return len(self.items)
|
||||
|
||||
|
||||
class EngineTestCase(unittest.TestCase):
|
||||
"""The unit test case for a front-end."""
|
||||
|
||||
_TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data')
|
||||
|
||||
# Show full diff results, part of TestCase so does not follow our naming
|
||||
# conventions.
|
||||
maxDiff = None
|
||||
|
||||
def _GetTestFilePath(self, path_segments):
|
||||
"""Retrieves the path of a test file relative to the test data directory.
|
||||
|
||||
Args:
|
||||
path_segments: the path segments inside the test data directory.
|
||||
|
||||
Returns:
|
||||
A path of the test file.
|
||||
"""
|
||||
# Note that we need to pass the individual path segments to os.path.join
|
||||
# and not a list.
|
||||
return os.path.join(self._TEST_DATA_PATH, *path_segments)
|
||||
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2014 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Engine utility functions."""
|
||||
|
||||
import logging
|
||||
|
||||
from dfvfs.helpers import file_system_searcher
|
||||
|
||||
from plaso.winreg import path_expander
|
||||
|
||||
|
||||
def BuildFindSpecsFromFile(filter_file_path, pre_obj=None):
|
||||
"""Returns a list of find specification from a filter file.
|
||||
|
||||
Args:
|
||||
filter_file_path: A path to a file that contains find specifications.
|
||||
pre_obj: A preprocessing object (instance of PreprocessObject). This is
|
||||
optional but when provided takes care of expanding each segment.
|
||||
"""
|
||||
find_specs = []
|
||||
|
||||
if pre_obj:
|
||||
expander = path_expander.WinRegistryKeyPathExpander()
|
||||
|
||||
with open(filter_file_path, 'rb') as file_object:
|
||||
for line in file_object:
|
||||
line = line.strip()
|
||||
if line.startswith(u'#'):
|
||||
continue
|
||||
|
||||
if pre_obj:
|
||||
try:
|
||||
line = expander.ExpandPath(line, pre_obj=pre_obj)
|
||||
except KeyError as exception:
|
||||
logging.error((
|
||||
u'Unable to use collection filter line: {0:s} with error: '
|
||||
u'{1:s}').format(line, exception))
|
||||
continue
|
||||
|
||||
if not line.startswith(u'/'):
|
||||
logging.warning((
|
||||
u'The filter string must be defined as an abolute path: '
|
||||
u'{0:s}').format(line))
|
||||
continue
|
||||
|
||||
_, _, file_path = line.rstrip().rpartition(u'/')
|
||||
if not file_path:
|
||||
logging.warning(
|
||||
u'Unable to parse the filter string: {0:s}'.format(line))
|
||||
continue
|
||||
|
||||
# Convert the filter paths into a list of path segments and strip
|
||||
# the root path segment.
|
||||
path_segments = line.split(u'/')
|
||||
path_segments.pop(0)
|
||||
|
||||
find_specs.append(file_system_searcher.FindSpec(
|
||||
location_regex=path_segments, case_sensitive=False))
|
||||
|
||||
return find_specs
|
||||
@@ -0,0 +1,352 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2012 The Plaso Project Authors.
|
||||
# Please see the AUTHORS file for details on individual authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The event extraction worker."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dfvfs.resolver import context
|
||||
from dfvfs.resolver import resolver as path_spec_resolver
|
||||
|
||||
try:
|
||||
from guppy import hpy
|
||||
except ImportError:
|
||||
hpy = None
|
||||
|
||||
from plaso.engine import classifier
|
||||
from plaso.engine import queue
|
||||
from plaso.lib import errors
|
||||
from plaso.parsers import manager as parsers_manager
|
||||
|
||||
|
||||
class BaseEventExtractionWorker(queue.ItemQueueConsumer):
|
||||
"""Class that defines the event extraction worker base.
|
||||
|
||||
This class is designed to watch a queue for path specifications of files
|
||||
and directories (file entries) for which events need to be extracted.
|
||||
|
||||
The event extraction worker needs to determine if a parser suitable
|
||||
for parsing a particular file is available. All extracted event objects
|
||||
are pushed on a storage queue for further processing.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, identifier, process_queue, event_queue_producer,
|
||||
parse_error_queue_producer, parser_context):
|
||||
"""Initializes the event extraction worker object.
|
||||
|
||||
Args:
|
||||
identifier: The identifier, usually an incrementing integer.
|
||||
process_queue: The process queue (instance of Queue). This queue contains
|
||||
the file entries that need to be processed.
|
||||
event_queue_producer: The event object queue producer (instance of
|
||||
ItemQueueProducer).
|
||||
parse_error_queue_producer: The parse error queue producer (instance of
|
||||
ItemQueueProducer).
|
||||
parser_context: A parser context object (instance of ParserContext).
|
||||
"""
|
||||
super(BaseEventExtractionWorker, self).__init__(process_queue)
|
||||
self._enable_debug_output = False
|
||||
self._identifier = identifier
|
||||
self._open_files = False
|
||||
self._parser_context = parser_context
|
||||
self._filestat_parser_object = None
|
||||
self._parser_objects = None
|
||||
|
||||
# We need a resolver context per process to prevent multi processing
|
||||
# issues with file objects stored in images.
|
||||
self._resolver_context = context.Context()
|
||||
self._event_queue_producer = event_queue_producer
|
||||
self._parse_error_queue_producer = parse_error_queue_producer
|
||||
|
||||
# Attributes that contain the current status of the worker.
|
||||
self._current_working_file = u''
|
||||
self._is_running = False
|
||||
|
||||
# Attributes for profiling.
|
||||
self._enable_profiling = False
|
||||
self._heapy = None
|
||||
self._profiling_sample = 0
|
||||
self._profiling_sample_rate = 1000
|
||||
self._profiling_sample_file = u'{0!s}.hpy'.format(self._identifier)
|
||||
|
||||
def _ConsumeItem(self, path_spec):
|
||||
"""Consumes an item callback for ConsumeItems.
|
||||
|
||||
Args:
|
||||
path_spec: a path specification (instance of dfvfs.PathSpec).
|
||||
"""
|
||||
file_entry = path_spec_resolver.Resolver.OpenFileEntry(
|
||||
path_spec, resolver_context=self._resolver_context)
|
||||
|
||||
if file_entry is None:
|
||||
logging.warning(u'Unable to open file entry: {0:s}'.format(
|
||||
path_spec.comparable))
|
||||
return
|
||||
|
||||
try:
|
||||
self.ParseFileEntry(file_entry)
|
||||
except IOError as exception:
|
||||
logging.warning(u'Unable to parse file: {0:s} with error: {1:s}'.format(
|
||||
path_spec.comparable, exception))
|
||||
|
||||
def _DebugParseFileEntry(self):
|
||||
"""Callback for debugging file entry parsing failures."""
|
||||
return
|
||||
|
||||
def _ParseFileEntryWithParser(self, parser_object, file_entry):
|
||||
"""Parses a file entry with a specific parser.
|
||||
|
||||
Args:
|
||||
parser_object: A parser object (instance of BaseParser).
|
||||
file_entry: A file entry object (instance of dfvfs.FileEntry).
|
||||
|
||||
Raises:
|
||||
QueueFull: If a queue is full.
|
||||
"""
|
||||
try:
|
||||
parser_object.Parse(self._parser_context, file_entry)
|
||||
|
||||
except errors.UnableToParseFile as exception:
|
||||
logging.debug(u'Not a {0:s} file ({1:s}) - {2:s}'.format(
|
||||
parser_object.NAME, file_entry.name, exception))
|
||||
|
||||
except errors.QueueFull:
|
||||
raise
|
||||
|
||||
except IOError as exception:
|
||||
logging.debug(
|
||||
u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format(
|
||||
parser_object.NAME, file_entry.path_spec.comparable,
|
||||
exception))
|
||||
|
||||
# Casting a wide net, catching all exceptions. Done to keep the worker
|
||||
# running, despite the parser hitting errors, so the worker doesn't die
|
||||
# if a single file is corrupted or there is a bug in a parser.
|
||||
except Exception as exception:
|
||||
logging.warning(
|
||||
u'[{0:s}] Unable to process file: {1:s} with error: {2:s}.'.format(
|
||||
parser_object.NAME, file_entry.path_spec.comparable,
|
||||
exception))
|
||||
logging.debug(
|
||||
u'The path specification that caused the error: {0:s}'.format(
|
||||
file_entry.path_spec.comparable))
|
||||
logging.exception(exception)
|
||||
|
||||
if self._enable_debug_output:
|
||||
self._DebugParseFileEntry()
|
||||
|
||||
def _ProfilingStart(self):
|
||||
"""Starts the profiling."""
|
||||
self._heapy.setrelheap()
|
||||
self._profiling_sample = 0
|
||||
|
||||
try:
|
||||
os.remove(self._profiling_sample_file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _ProfilingStop(self):
|
||||
"""Stops the profiling."""
|
||||
self._ProfilingWriteSample()
|
||||
|
||||
def _ProfilingUpdate(self):
|
||||
"""Updates the profiling."""
|
||||
self._profiling_sample += 1
|
||||
|
||||
if self._profiling_sample >= self._profiling_sample_rate:
|
||||
self._ProfilingWriteSample()
|
||||
self._profiling_sample = 0
|
||||
|
||||
def _ProfilingWriteSample(self):
|
||||
"""Writes a profiling sample to the sample file."""
|
||||
heap = self._heapy.heap()
|
||||
heap.dump(self._profiling_sample_file)
|
||||
|
||||
def GetStatus(self):
|
||||
"""Returns a status dictionary."""
|
||||
return {
|
||||
'is_running': self._is_running,
|
||||
'identifier': u'Worker_{0:d}'.format(self._identifier),
|
||||
'current_file': self._current_working_file,
|
||||
'counter': self._parser_context.number_of_events}
|
||||
|
||||
def InitalizeParserObjects(self, parser_filter_string=None):
|
||||
"""Initializes the parser objects.
|
||||
|
||||
The parser_filter_string is a simple comma separated value string that
|
||||
denotes a list of parser names to include and/or exclude. Each entry
|
||||
can have the value of:
|
||||
+ Exact match of a list of parsers, or a preset (see
|
||||
plaso/frontend/presets.py for a full list of available presets).
|
||||
+ A name of a single parser (case insensitive), eg. msiecfparser.
|
||||
+ A glob name for a single parser, eg: '*msie*' (case insensitive).
|
||||
|
||||
Args:
|
||||
parser_filter_string: Optional parser filter string. The default is None.
|
||||
"""
|
||||
self._parser_objects = parsers_manager.ParsersManager.GetParserObjects(
|
||||
parser_filter_string=parser_filter_string)
|
||||
|
||||
for parser_object in self._parser_objects:
|
||||
if parser_object.NAME == 'filestat':
|
||||
self._filestat_parser_object = parser_object
|
||||
break
|
||||
|
||||
def ParseFileEntry(self, file_entry):
|
||||
"""Parses a file entry.
|
||||
|
||||
Args:
|
||||
file_entry: A file entry object (instance of dfvfs.FileEntry).
|
||||
"""
|
||||
logging.debug(u'[ParseFileEntry] Parsing: {0:s}'.format(
|
||||
file_entry.path_spec.comparable))
|
||||
|
||||
self._current_working_file = getattr(
|
||||
file_entry.path_spec, u'location', file_entry.name)
|
||||
|
||||
if file_entry.IsDirectory() and self._filestat_parser_object:
|
||||
self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry)
|
||||
|
||||
elif file_entry.IsFile():
|
||||
# TODO: Not go through all parsers, just the ones
|
||||
# that the classifier classifies the file as.
|
||||
|
||||
for parser_object in self._parser_objects:
|
||||
logging.debug(u'Trying to parse: {0:s} with parser: {1:s}'.format(
|
||||
file_entry.name, parser_object.NAME))
|
||||
|
||||
self._ParseFileEntryWithParser(parser_object, file_entry)
|
||||
|
||||
logging.debug(u'[ParseFileEntry] Done parsing: {0:s}'.format(
|
||||
file_entry.path_spec.comparable))
|
||||
|
||||
if self._enable_profiling:
|
||||
self._ProfilingUpdate()
|
||||
|
||||
if self._open_files:
|
||||
try:
|
||||
for sub_file_entry in classifier.Classifier.SmartOpenFiles(file_entry):
|
||||
if self._abort:
|
||||
break
|
||||
|
||||
self.ParseFileEntry(sub_file_entry)
|
||||
|
||||
except IOError as exception:
|
||||
logging.warning(
|
||||
u'Unable to parse file: {0:s} with error: {1:s}'.format(
|
||||
file_entry.path_spec.comparable, exception))
|
||||
|
||||
def Run(self):
|
||||
"""Extracts event objects from file entries."""
|
||||
self._parser_context.ResetCounters()
|
||||
|
||||
if self._enable_profiling:
|
||||
self._ProfilingStart()
|
||||
|
||||
self._is_running = True
|
||||
|
||||
logging.info(
|
||||
u'Worker {0:d} (PID: {1:d}) started monitoring process queue.'.format(
|
||||
self._identifier, os.getpid()))
|
||||
|
||||
self.ConsumeItems()
|
||||
|
||||
logging.info(
|
||||
u'Worker {0:d} (PID: {1:d}) stopped monitoring process queue.'.format(
|
||||
self._identifier, os.getpid()))
|
||||
|
||||
self._current_working_file = u''
|
||||
|
||||
self._is_running = False
|
||||
|
||||
if self._enable_profiling:
|
||||
self._ProfilingStop()
|
||||
|
||||
self._resolver_context.Empty()
|
||||
|
||||
def SetEnableDebugOutput(self, enable_debug_output):
|
||||
"""Enables or disables debug output.
|
||||
|
||||
Args:
|
||||
enable_debug_output: boolean value to indicate if the debug output
|
||||
should be enabled.
|
||||
"""
|
||||
self._enable_debug_output = enable_debug_output
|
||||
|
||||
def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000):
|
||||
"""Enables or disables profiling.
|
||||
|
||||
Args:
|
||||
enable_debug_output: boolean value to indicate if the profiling
|
||||
should be enabled.
|
||||
profiling_sample_rate: optional integer indicating the profiling sample
|
||||
rate. The value contains the number of files
|
||||
processed. The default value is 1000.
|
||||
"""
|
||||
if hpy:
|
||||
self._enable_profiling = enable_profiling
|
||||
self._profiling_sample_rate = profiling_sample_rate
|
||||
|
||||
if self._enable_profiling and not self._heapy:
|
||||
self._heapy = hpy()
|
||||
|
||||
def SetFilterObject(self, filter_object):
|
||||
"""Sets the filter object.
|
||||
|
||||
Args:
|
||||
filter_object: the filter object (instance of objectfilter.Filter).
|
||||
"""
|
||||
self._parser_context.SetFilterObject(filter_object)
|
||||
|
||||
def SetMountPath(self, mount_path):
|
||||
"""Sets the mount path.
|
||||
|
||||
Args:
|
||||
mount_path: string containing the mount path.
|
||||
"""
|
||||
self._parser_context.SetMountPath(mount_path)
|
||||
|
||||
# TODO: rename this mode.
|
||||
def SetOpenFiles(self, open_files):
|
||||
"""Sets the open files mode.
|
||||
|
||||
Args:
|
||||
open_files: boolean value to indicate if the worker should scan for
|
||||
file entries inside files.
|
||||
"""
|
||||
self._open_files = open_files
|
||||
|
||||
def SetTextPrepend(self, text_prepend):
|
||||
"""Sets the text prepend.
|
||||
|
||||
Args:
|
||||
text_prepend: string that contains the text to prepend to every
|
||||
event object.
|
||||
"""
|
||||
self._parser_context.SetTextPrepend(text_prepend)
|
||||
|
||||
def SignalAbort(self):
|
||||
"""Signals the worker to abort."""
|
||||
super(BaseEventExtractionWorker, self).SignalAbort()
|
||||
self._parser_context.SignalAbort()
|
||||
|
||||
@classmethod
|
||||
def SupportsProfiling(cls):
|
||||
"""Returns a boolean value to indicate if profiling is supported."""
|
||||
return hpy is not None
|
||||
Reference in New Issue
Block a user