#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright 2014 The Plaso Project Authors. # Please see the AUTHORS file for details on individual authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This file contains Popularity Contest log file parser in plaso. Information updated 20 january 2014. From Debian Package Popularity Contest Avery Pennarun From 'http://www.unix.com/man-page/Linux/8/popularity-contest/': ' The popularity-contest command gathers information about Debian pack- ages installed on the system, and prints the name of the most recently used executable program in that package as well as its last-accessed time (atime) and last-attribute-changed time (ctime) to stdout. When aggregated with the output of popularity-contest from many other systems, this information is valuable because it can be used to deter- mine which Debian packages are commonly installed, used, or installed and never used. This helps Debian maintainers make decisions such as which packages should be installed by default on new systems. The resulting statistic is available from the project home page http://popcon.debian.org/. Normally, popularity-contest is run from a cron(8) job, /etc/cron.daily/popularity-contest, which automatically submits the results to Debian package maintainers (only once a week) according to the settings in /etc/popularity-contest.conf and /usr/share/popularity- contest/default.conf. ' From 'http://popcon.ubuntu.com/README': ' The popularity-contest output looks like this: POPULARITY-CONTEST-0 TIME:914183330 ID:b92a5fc1809d8a95a12eb3a3c8445 914183333 909868335 grep /bin/fgrep 914183333 909868280 findutils /usr/bin/find 914183330 909885698 dpkg-awk /usr/bin/dpkg-awk 914183330 909868577 gawk /usr/bin/gawk [...more lines...] END-POPULARITY-CONTEST-0 TIME:914183335 The first and last lines allow you to put more than one set of popularity-contest results into a single file and then split them up easily later. The rest of the lines are package entries, one line for each package installed on your system. They have the format: is the name of the Debian package that contains . is the most recently used program, static library, or header (.h) file in the package. and are the access time and creation time of the on your disk, respectively, represented as the number of seconds since midnight GMT on January 1, 1970 (i.e. in Unix time_t format). Linux updates whenever you open the file; was set when you first installed the package. is determined by popularity-contest depending on , , and the current date. can be RECENT-CTIME, OLD, or NOFILES. RECENT-CTIME means that atime is very close to ctime; it's impossible to tell whether the package was used recently or not, since is also updated when is set. Normally, this happens because you have recently upgraded the package to a new version, resetting the . OLD means that the is more than a month ago; you haven't used the package for more than a month. NOFILES means that no files in the package seemed to be programs, so , , and are invalid.' REMARKS. The parser will generate events solely based on the field and not using , to reduce the generation of (possibly many) useless events all with the same . Indeed, that will be probably get from file system and/or package management logs. The will be reported in the log line. """ import logging import pyparsing from plaso.events import time_events from plaso.lib import eventdata from plaso.lib import timelib from plaso.parsers import manager from plaso.parsers import text_parser __author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' class PopularityContestSessionEvent(time_events.PosixTimeEvent): """Convenience class for a Popularity Contest start/end event.""" DATA_TYPE = 'popularity_contest:session:event' def __init__(self, timestamp, session, status, hostid=None, details=None): """Initializes the event object. Args: timestamp: microseconds since epoch in UTC, it's the start/end time. session: the session number. status: start or end of the session. hostid: the host uuid. details: the popularity contest version and host architecture. """ super(PopularityContestSessionEvent, self).__init__( timestamp, eventdata.EventTimestamp.ADDED_TIME) self.session = session self.status = status self.hostid = hostid self.details = details class PopularityContestEvent(time_events.PosixTimeEvent): """Convenience class for a Popularity Contest line event.""" DATA_TYPE = 'popularity_contest:log:event' def __init__(self, timestamp, ctime, package, mru, tag=None): """Initializes the event object. Args: timestamp: microseconds since epoch in UTC, it's the . ctime: seconds since epoch in UTC, it's the . package: the installed packaged name, whom mru belongs to. mru: the recently used app/library from package. tag: the popularity context tag. """ super(PopularityContestEvent, self).__init__( timestamp, eventdata.EventTimestamp.ACCESS_TIME) # TODO: adding ctime as is, reconsider a conversion to human readable form. self.ctime = ctime self.package = package self.mru = mru self.record_tag = tag class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): """Parse popularity contest log files.""" NAME = 'popularity_contest' DESCRIPTION = u'Parser for popularity contest log files.' EPOCH = text_parser.PyparsingConstants.INTEGER.setResultsName('epoch') PACKAGE = pyparsing.Word(pyparsing.printables).setResultsName('package') MRU = pyparsing.Word(pyparsing.printables).setResultsName('mru') TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag') HEADER = ( pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal(u'TIME:').suppress() + EPOCH + pyparsing.Literal('ID:').suppress() + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details')) FOOTER = ( pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal(u'TIME:').suppress() + EPOCH) LOG_LINE = ( EPOCH.setResultsName('atime') + EPOCH.setResultsName('ctime') + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) LINE_STRUCTURES = [ ('logline', LOG_LINE), ('header', HEADER), ('footer', FOOTER), ] def VerifyStructure(self, parser_context, line): """Verify that this file is a Popularity Contest log file. Args: parser_context: A parser context object (instance of ParserContext). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ try: header_struct = self.HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a Popularity Contest log file, invalid header') return False if not timelib.Timestamp.FromPosixTime(header_struct.epoch): logging.debug(u'Invalid Popularity Contest log file header timestamp.') return False return True def ParseRecord(self, parser_context, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_context: A parser context object (instance of ParserContext). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. Returns: An event object (instance of EventObject) or None. """ # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': return self._ParseLogLine(structure) elif key == 'header': if not structure.epoch: logging.debug(u'PopularityContestParser, header with invalid epoch.') return return PopularityContestSessionEvent( structure.epoch, unicode(structure.session), u'start', structure.id, structure.details) elif key == 'footer': if not structure.epoch: logging.debug(u'PopularityContestParser, footer with invalid epoch.') return return PopularityContestSessionEvent( structure.epoch, unicode(structure.session), u'end') else: logging.warning( u'PopularityContestParser, unknown structure: {}.'.format(key)) def _ParseLogLine(self, structure): """Gets an event_object or None from the pyparsing ParseResults. Args: structure: the pyparsing ParseResults object. Returns: event_object: a plaso event or None. """ # Required fields are and and we are not interested in # log lines without . if not structure.mru: return # The field (as ) is always present but could be 0. # In case of equal to 0, we are in case, safely return # without logging. if not structure.atime: return # TODO: not doing any check on fields, even if only informative # probably it could be better to check for the expected values. # TODO: ctime is a numeric string representing seconds since epoch UTC, # reconsider a conversion to integer together with microseconds usage. return PopularityContestEvent( structure.atime, structure.ctime, structure.package, structure.mru, structure.tag) manager.ParsersManager.RegisterParser(PopularityContestParser)