plaso-rubanetra/plaso/lib/utils.py
2020-04-06 18:48:34 +02:00

200 lines
5.6 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2013 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains utility functions."""
import logging
from plaso.lib import errors
from plaso.lib import lexer
RESERVED_VARIABLES = frozenset(
['username', 'inode', 'hostname', 'body', 'parser', 'regvalue', 'timestamp',
'timestamp_desc', 'source_short', 'source_long', 'timezone', 'filename',
'display_name', 'pathspec', 'offset', 'store_number', 'store_index',
'tag', 'data_type', 'metadata', 'http_headers', 'query', 'mapped_files',
'uuid'])
def IsText(bytes_in, encoding=None):
"""Examine the bytes in and determine if they are indicative of a text.
Parsers need quick and at least semi reliable method of discovering whether
or not a particular byte stream is a text or resembles text or not. This can
be used in text parsers to determine if a file is a text file or not for
instance.
The method assumes the byte sequence is either ASCII, UTF-8, UTF-16 or method
supplied character encoding. Otherwise it will make the assumption the byte
sequence is not text, but a byte sequence.
Args:
bytes_in: The byte sequence passed to the method that needs examination.
encoding: Optional encoding to test, if not defined only ASCII, UTF-8 and
UTF-16 are tried.
Returns:
Boolean value indicating whether or not the byte sequence is a text or not.
"""
# TODO: Improve speed and accuracy of this method.
# Start with the assumption we are dealing with a text.
is_ascii = True
# Check if this is ASCII text string.
for char in bytes_in:
if not 31 < ord(char) < 128:
is_ascii = False
break
# We have an ASCII string.
if is_ascii:
return is_ascii
# Is this already a unicode text?
if type(bytes_in) == unicode:
return True
# Check if this is UTF-8
try:
_ = bytes_in.decode('utf-8')
return True
except UnicodeDecodeError:
pass
# TODO: UTF 16 decode is successful in too
# many edge cases where we are not really dealing with
# a text at all. Leaving this out for now, consider
# re-enabling or making a better determination.
#try:
# _ = bytes_in.decode('utf-16-le')
# return True
#except UnicodeDecodeError:
# pass
if encoding:
try:
_ = bytes_in.decode(encoding)
return True
except UnicodeDecodeError:
pass
except LookupError:
logging.error(
u'String encoding not recognized: {0:s}'.format(encoding))
return False
def GetBaseName(path):
"""Returns back a basename for a path (could be Windows or *NIX separated)."""
# First check the case where both forward and backward slash are in the path.
if '/' and '\\' in path:
# Let's count slashes and guess which one is the right one.
forward_count = len(path.split('/'))
backward_count = len(path.split('\\'))
if forward_count > backward_count:
_, _, base = path.rpartition('/')
else:
_, _, base = path.rpartition('\\')
return base
# Now we are sure there is only one type of separators.
if '/' in path:
_, _, base = path.rpartition('/')
else:
_, _, base = path.rpartition('\\')
return base
def GetUnicodeString(string):
"""Converts the string to Unicode if necessary."""
if type(string) != unicode:
return str(string).decode('utf8', 'ignore')
return string
class PathReplacer(lexer.Lexer):
"""Replace path variables with values gathered from earlier preprocessing."""
tokens = [
lexer.Token('.', '{{([^}]+)}}', 'ReplaceVariable', ''),
lexer.Token('.', '{([^}]+)}', 'ReplaceString', ''),
lexer.Token('.', '([^{])', 'ParseString', ''),
]
def __init__(self, pre_obj, data=''):
"""Constructor for a path replacer."""
super(PathReplacer, self).__init__(data)
self._path = []
self._pre_obj = pre_obj
def GetPath(self):
"""Run the lexer and replace path."""
while True:
_ = self.NextToken()
if self.Empty():
break
return u''.join(self._path)
def ParseString(self, match, **_):
"""Append a string to the path."""
self._path.append(match.group(1))
def ReplaceVariable(self, match, **_):
"""Replace a string that should not be a variable."""
self._path.append(u'{{{0:s}}}'.format(match.group(1)))
def ReplaceString(self, match, **_):
"""Replace a variable with a given attribute."""
replace = getattr(self._pre_obj, match.group(1), None)
if replace:
self._path.append(replace)
else:
raise errors.PathNotFound(
u'Path variable: {} not discovered yet.'.format(match.group(1)))
def GetInodeValue(inode_raw):
"""Read in a 'raw' inode value and try to convert it into an integer.
Args:
inode_raw: A string or an int inode value.
Returns:
An integer inode value.
"""
if type(inode_raw) in (int, long):
return inode_raw
if type(inode_raw) is float:
return int(inode_raw)
try:
return int(inode_raw)
except ValueError:
# Let's do one more attempt.
inode_string, _, _ = str(inode_raw).partition('-')
try:
return int(inode_string)
except ValueError:
return -1