210 lines
7.0 KiB
Python
Raw Normal View History

2020-04-06 18:48:34 +02:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2012 The Plaso Project Authors.
# Please see the AUTHORS file for details on individual authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains a syslog parser in plaso."""
import datetime
import logging
from plaso.events import text_events
from plaso.lib import lexer
from plaso.lib import timelib
from plaso.lib import utils
from plaso.parsers import manager
from plaso.parsers import text_parser
class SyslogLineEvent(text_events.TextEvent):
"""Convenience class for a syslog line event."""
DATA_TYPE = 'syslog:line'
class SyslogParser(text_parser.SlowLexicalTextParser):
"""Parse text based syslog files."""
NAME = 'syslog'
DESCRIPTION = u'Parser for syslog files.'
# TODO: can we change this similar to SQLite where create an
# event specific object for different lines using a callback function.
# Define the tokens that make up the structure of a syslog file.
tokens = [
lexer.Token('INITIAL',
'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ',
'SetMonth', 'DAY'),
lexer.Token('DAY', r'\s?(\d{1,2})\s+', 'SetDay', 'TIME'),
lexer.Token('TIME', r'([0-9:\.]+) ', 'SetTime', 'STRING_HOST'),
lexer.Token('STRING_HOST', r'^--(-)', 'ParseHostname', 'STRING'),
lexer.Token('STRING_HOST', r'([^\s]+) ', 'ParseHostname', 'STRING_PID'),
lexer.Token('STRING_PID', r'([^\:\n]+)', 'ParsePid', 'STRING'),
lexer.Token('STRING', r'([^\n]+)', 'ParseString', ''),
lexer.Token('STRING', r'\n\t', None, ''),
lexer.Token('STRING', r'\t', None, ''),
lexer.Token('STRING', r'\n', 'ParseMessage', 'INITIAL'),
lexer.Token('.', '([^\n]+)\n', 'ParseIncomplete', 'INITIAL'),
lexer.Token('.', '\n[^\t]', 'ParseIncomplete', 'INITIAL'),
lexer.Token('S[.]+', '(.+)', 'ParseString', ''),
]
def __init__(self):
"""Initializes a syslog parser object."""
super(SyslogParser, self).__init__(local_zone=True)
# Set the initial year to 0 (fixed in the actual Parse method)
self._year_use = 0
self._last_month = 0
# Set some additional attributes.
self.attributes['reporter'] = ''
self.attributes['pid'] = ''
def _GetYear(self, stat, timezone):
"""Retrieves the year either from the input file or from the settings."""
time = getattr(stat, 'crtime', 0)
if not time:
time = getattr(stat, 'ctime', 0)
if not time:
current_year = timelib.GetCurrentYear()
logging.error((
u'Unable to determine year of syslog file.\nDefautling to: '
u'{0:d}').format(current_year))
return current_year
try:
timestamp = datetime.datetime.fromtimestamp(time, timezone)
except ValueError as exception:
current_year = timelib.GetCurrentYear()
logging.error(
u'Unable to determine year of syslog file with error: {0:s}\n'
u'Defaulting to: {1:d}'.format(exception, current_year))
return current_year
return timestamp.year
def ParseLine(self, parser_context):
"""Parse a single line from the syslog file.
This method extends the one from TextParser slightly, adding
the context of the reporter and pid values found inside syslog
files.
Args:
parser_context: A parser context object (instance of ParserContext).
Returns:
An event object (instance of TextEvent).
"""
# Note this an older comment applying to a similar approach previously
# the init function.
# TODO: this is a HACK to get the tests working let's discuss this.
if not self._year_use:
self._year_use = parser_context.year
if not self._year_use:
# TODO: Find a decent way to actually calculate the correct year
# from the syslog file, instead of relying on stats object.
stat = self.file_entry.GetStat()
self._year_use = self._GetYear(stat, parser_context.timezone)
if not self._year_use:
# TODO: Make this sensible, not have the year permanent.
self._year_use = 2012
month_compare = int(self.attributes['imonth'])
if month_compare and self._last_month > month_compare:
self._year_use += 1
self._last_month = int(self.attributes['imonth'])
self.attributes['iyear'] = self._year_use
return super(SyslogParser, self).ParseLine(parser_context)
def ParseHostname(self, match=None, **unused_kwargs):
"""Parses the hostname.
This is a callback function for the text parser (lexer) and is
called by the STRING_HOST lexer state.
Args:
match: The regular expression match object.
"""
self.attributes['hostname'] = match.group(1)
def ParsePid(self, match=None, **unused_kwargs):
"""Parses the process identifier (PID).
This is a callback function for the text parser (lexer) and is
called by the STRING_PID lexer state.
Args:
match: The regular expression match object.
"""
# TODO: Change this logic and rather add more Tokens that
# fully cover all variations of the various PID stages.
line = match.group(1)
if line[-1] == ']':
splits = line.split('[')
if len(splits) == 2:
self.attributes['reporter'], pid = splits
else:
pid = splits[-1]
self.attributes['reporter'] = '['.join(splits[:-1])
try:
self.attributes['pid'] = int(pid[:-1])
except ValueError:
self.attributes['pid'] = 0
else:
self.attributes['reporter'] = line
def ParseString(self, match=None, **unused_kwargs):
"""Parses a (body text) string.
This is a callback function for the text parser (lexer) and is
called by the STRING lexer state.
Args:
match: The regular expression match object.
"""
self.attributes['body'] += utils.GetUnicodeString(match.group(1))
def PrintLine(self):
"""Prints a log line."""
self.attributes['iyear'] = 2012
return super(SyslogParser, self).PrintLine()
# TODO: this is a rough initial implementation to get this working.
def CreateEvent(self, timestamp, offset, attributes):
"""Creates a syslog line event.
This overrides the default function in TextParser to create
syslog line events instead of text events.
Args:
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC.
offset: The offset of the event.
attributes: A dict that contains the events attributes.
Returns:
A text event (SyslogLineEvent).
"""
return SyslogLineEvent(timestamp, offset, attributes)
manager.ParsersManager.RegisterParser(SyslogParser)