265 lines
9.8 KiB
Python
265 lines
9.8 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2014 The Plaso Project Authors.
|
|
# Please see the AUTHORS file for details on individual authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""This file contains XChat log file parser in plaso.
|
|
|
|
Information updated 24 July 2013.
|
|
|
|
The parser applies to XChat log files. Despite their apparent
|
|
simplicity it's not straightforward to manage every possible case.
|
|
XChat tool allows users to specify how timestamp will be
|
|
encoded (using the strftime function), by letting them to specify
|
|
additional separators. This parser will accept only the simplest
|
|
default English form of an XChat log file, as the following:
|
|
|
|
**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2001
|
|
|
|
dec 31 21:11:55 --> You are now talking on #gugle
|
|
dec 31 21:11:55 --- Topic for #gugle is plaso, nobody knows what it means
|
|
dec 31 21:11:55 Topic for #gugle set by Kristinn
|
|
dec 31 21:11:55 --- Joachim gives voice to fpi
|
|
dec 31 21:11:55 * XChat here
|
|
dec 31 21:11:58 <fpi> ola plas-ing guys!
|
|
dec 31 21:12:00 <Kristinn> ftw!
|
|
|
|
It could be managed the missing month/day case too, by extracting
|
|
the month/day information from the header. But the parser logic
|
|
would become intricate, since it would need to manage day transition,
|
|
chat lines crossing the midnight. From there derives the last day of
|
|
the year bug, since the parser will not manage that transition.
|
|
|
|
Moreover the strftime is locale-dependant, so month names, footer and
|
|
headers can change, even inside the same log file. Being said that, the
|
|
following will be the main logic used to parse the log files (note that
|
|
the first header *must be* '**** BEGIN ...' otherwise file will be skipped).
|
|
|
|
1) Check for '****'
|
|
1.1) If 'BEGIN LOGGING AT' (English)
|
|
1.1.1) Extract the YEAR
|
|
1.1.2) Generate new event start logging
|
|
1.1.3) set parsing = True
|
|
1.2) If 'END LOGGING'
|
|
1.2.1) If parsing, set parsing=False
|
|
1.2.2) If not parsing, log debug
|
|
1.2.3) Generate new event end logging
|
|
1.3) If not BEGIN|END we are facing a different language
|
|
and we don't now which language!
|
|
If parsing is True, set parsing=False and log debug
|
|
2) Not '****' so we are parsing a line
|
|
2.1) If parsing = True, try to parse line and generate event
|
|
2.2) If parsing = False, skip until next good header is found
|
|
|
|
References
|
|
http://xchat.org
|
|
"""
|
|
|
|
import logging
|
|
|
|
import pyparsing
|
|
|
|
from plaso.events import time_events
|
|
from plaso.lib import eventdata
|
|
from plaso.lib import timelib
|
|
from plaso.parsers import manager
|
|
from plaso.parsers import text_parser
|
|
|
|
|
|
__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)'
|
|
|
|
|
|
class XChatLogEvent(time_events.TimestampEvent):
|
|
"""Convenience class for a XChat Log line event."""
|
|
DATA_TYPE = 'xchat:log:line'
|
|
|
|
def __init__(self, timestamp, text, nickname=None):
|
|
"""Initializes the event object.
|
|
|
|
Args:
|
|
timestamp: Microseconds since Epoch in UTC.
|
|
text: The text sent by nickname or other text (server, messages, etc.).
|
|
"""
|
|
super(XChatLogEvent, self).__init__(
|
|
timestamp, eventdata.EventTimestamp.ADDED_TIME)
|
|
self.text = text
|
|
if nickname:
|
|
self.nickname = nickname
|
|
|
|
|
|
class XChatLogParser(text_parser.PyparsingSingleLineTextParser):
|
|
"""Parse XChat log files."""
|
|
|
|
NAME = 'xchatlog'
|
|
DESCRIPTION = u'Parser for XChat log files.'
|
|
|
|
ENCODING = 'UTF-8'
|
|
|
|
# Common (header/footer/body) pyparsing structures.
|
|
# TODO: Only English ASCII timestamp supported ATM, add support for others.
|
|
IGNORE_STRING = pyparsing.Word(pyparsing.printables).suppress()
|
|
LOG_ACTION = pyparsing.Word(
|
|
pyparsing.printables, min=3, max=5).setResultsName('log_action')
|
|
MONTH_NAME = pyparsing.Word(
|
|
pyparsing.printables, exact=3).setResultsName('month_name')
|
|
DAY = pyparsing.Word(pyparsing.nums, max=2).setParseAction(
|
|
text_parser.PyParseIntCast).setResultsName('day')
|
|
TIME = text_parser.PyparsingConstants.TIME.setResultsName('time')
|
|
YEAR = text_parser.PyparsingConstants.YEAR.setResultsName('year')
|
|
NICKNAME = pyparsing.QuotedString(
|
|
u'<', endQuoteChar=u'>').setResultsName('nickname')
|
|
TEXT = pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')
|
|
|
|
# Header/footer pyparsing structures.
|
|
# Sample: "**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011".
|
|
# Note that "BEGIN LOGGING" text is localized (default, English) and can be
|
|
# different if XChat locale is different.
|
|
HEADER_SIGNATURE = pyparsing.Keyword(u'****')
|
|
HEADER = (
|
|
HEADER_SIGNATURE.suppress() + LOG_ACTION +
|
|
pyparsing.Keyword(u'LOGGING AT').suppress() + IGNORE_STRING +
|
|
MONTH_NAME + DAY + TIME + YEAR)
|
|
|
|
# Body (nickname, text and/or service messages) pyparsing structures.
|
|
# Sample: "dec 31 21:11:58 <fpi> ola plas-ing guys!".
|
|
LOG_LINE = MONTH_NAME + DAY + TIME + pyparsing.Optional(NICKNAME) + TEXT
|
|
|
|
# Define the available log line structures.
|
|
LINE_STRUCTURES = [
|
|
('logline', LOG_LINE),
|
|
('header', HEADER),
|
|
('header_signature', HEADER_SIGNATURE),
|
|
]
|
|
|
|
def __init__(self):
|
|
"""Initializes a XChatLog parser object."""
|
|
super(XChatLogParser, self).__init__()
|
|
self.offset = 0
|
|
self.xchat_year = 0
|
|
|
|
def _GetTimestamp(self, parse_result, timezone, year=0):
|
|
"""Determines the timestamp from the pyparsing ParseResults.
|
|
|
|
Args:
|
|
parse_result: The pyparsing ParseResults object.
|
|
timezone: The timezone object.
|
|
year: Optional current year. The default is 0.
|
|
|
|
Returns:
|
|
A timelib timestamp or 0.
|
|
"""
|
|
month = timelib.MONTH_DICT.get(parse_result.month_name.lower(), None)
|
|
if not month:
|
|
logging.debug(u'XChatLog unmanaged month name [{0:s}]'.format(
|
|
parse_result.month_name))
|
|
return 0
|
|
|
|
hour, minute, second = parse_result.time
|
|
if not year:
|
|
# This condition could happen when parsing the header line: if unable
|
|
# to get a valid year, returns a '0' timestamp, thus preventing any
|
|
# log line parsing (since xchat_year is unset to '0') until a new good
|
|
# (it means supported) header with a valid year information is found.
|
|
# TODO: reconsider this behaviour.
|
|
year = parse_result.get('year', 0)
|
|
|
|
if not year:
|
|
return 0
|
|
|
|
self.xchat_year = year
|
|
|
|
day = parse_result.get('day', 0)
|
|
return timelib.Timestamp.FromTimeParts(
|
|
year, month, day, hour, minute, second, timezone=timezone)
|
|
|
|
def VerifyStructure(self, parser_context, line):
|
|
"""Verify that this file is a XChat log file.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
line: A single line from the text file.
|
|
|
|
Returns:
|
|
True if this is the correct parser, False otherwise.
|
|
"""
|
|
try:
|
|
parse_result = self.HEADER.parseString(line)
|
|
except pyparsing.ParseException:
|
|
logging.debug(u'Unable to parse, not a valid XChat log file header')
|
|
return False
|
|
timestamp = self._GetTimestamp(parse_result, parser_context.timezone)
|
|
if not timestamp:
|
|
logging.debug(u'Wrong XChat timestamp: {0:s}'.format(parse_result))
|
|
return False
|
|
# Unset the xchat_year since we are only verifying structure.
|
|
# The value gets set in _GetTimestamp during the actual parsing.
|
|
self.xchat_year = 0
|
|
return True
|
|
|
|
def ParseRecord(self, parser_context, key, structure):
|
|
"""Parse each record structure and return an event object if applicable.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
key: An identification string indicating the name of the parsed
|
|
structure.
|
|
structure: A pyparsing.ParseResults object from a line in the
|
|
log file.
|
|
|
|
Returns:
|
|
An event object (instance of EventObject) or None.
|
|
"""
|
|
if key == 'logline':
|
|
if not self.xchat_year:
|
|
logging.debug(u'XChatLogParser, missing year information.')
|
|
return
|
|
timestamp = self._GetTimestamp(
|
|
structure, parser_context.timezone, year=self.xchat_year)
|
|
if not timestamp:
|
|
logging.debug(u'XChatLogParser, cannot get timestamp from line.')
|
|
return
|
|
# The text string contains multiple unnecessary whitespaces that need to
|
|
# be removed, thus the split and re-join.
|
|
return XChatLogEvent(
|
|
timestamp, u' '.join(structure.text.split()), structure.nickname)
|
|
elif key == 'header':
|
|
timestamp = self._GetTimestamp(structure, parser_context.timezone)
|
|
if not timestamp:
|
|
logging.warning(u'XChatLogParser, cannot get timestamp from header.')
|
|
return
|
|
if structure.log_action == u'BEGIN':
|
|
return XChatLogEvent(timestamp, u'XChat start logging')
|
|
elif structure.log_action == u'END':
|
|
# End logging, unset year.
|
|
self.xchat_year = 0
|
|
return XChatLogEvent(timestamp, u'XChat end logging')
|
|
else:
|
|
logging.warning(u'Unknown log action: {0:s}.'.format(
|
|
structure.log_action))
|
|
elif key == 'header_signature':
|
|
# If this key is matched (after others keys failed) we got a different
|
|
# localized header and we should stop parsing until a new good header
|
|
# is found. Stop parsing is done setting xchat_year to 0.
|
|
# Note that the code assumes that LINE_STRUCTURES will be used in the
|
|
# exact order as defined!
|
|
logging.warning(u'Unknown locale header.')
|
|
self.xchat_year = 0
|
|
else:
|
|
logging.warning(
|
|
u'Unable to parse record, unknown structure: {0:s}'.format(key))
|
|
|
|
|
|
manager.ParsersManager.RegisterParser(XChatLogParser)
|