222 lines
7.8 KiB
Python
222 lines
7.8 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2013 The Plaso Project Authors.
|
|
# Please see the AUTHORS file for details on individual authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""This file contains a plugin for parsing Google Analytics cookies."""
|
|
|
|
import urllib
|
|
|
|
from plaso.events import time_events
|
|
from plaso.lib import errors
|
|
from plaso.lib import eventdata
|
|
from plaso.parsers.cookie_plugins import interface
|
|
|
|
|
|
class GoogleAnalyticsEvent(time_events.PosixTimeEvent):
|
|
"""A simple placeholder for a Google Analytics event."""
|
|
|
|
DATA_TYPE = u'cookie:google:analytics'
|
|
|
|
def __init__(
|
|
self, timestamp, timestamp_desc, url, data_type_append, cookie_name,
|
|
**kwargs):
|
|
"""Initialize a Google Analytics event.
|
|
|
|
Args:
|
|
timestamp: The timestamp in a POSIX format.
|
|
timestamp_desc: A string describing the timestamp.
|
|
url: The full URL where the cookie got set.
|
|
data_type_append: String to append to the data type.
|
|
cookie_name: The name of the cookie.
|
|
"""
|
|
super(GoogleAnalyticsEvent, self).__init__(
|
|
timestamp, timestamp_desc, u'{0:s}:{1:s}'.format(
|
|
self.DATA_TYPE, data_type_append))
|
|
|
|
self.url = url
|
|
self.cookie_name = cookie_name
|
|
|
|
for key, value in kwargs.iteritems():
|
|
setattr(self, key, value)
|
|
|
|
|
|
class GoogleAnalyticsUtmzPlugin(interface.CookiePlugin):
|
|
"""A browser cookie plugin for Google Analytics cookies."""
|
|
|
|
NAME = 'google_analytics_utmz'
|
|
|
|
COOKIE_NAME = u'__utmz'
|
|
|
|
# Point to few sources for URL information.
|
|
URLS = [
|
|
(u'http://www.dfinews.com/articles/2012/02/'
|
|
u'google-analytics-cookies-and-forensic-implications')]
|
|
|
|
def GetEntries(
|
|
self, parser_context, file_entry=None, parser_chain=None,
|
|
cookie_data=None, url=None, **unused_kwargs):
|
|
"""Extracts event objects from the cookie.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
file_entry: Optional file entry object (instance of dfvfs.FileEntry).
|
|
The default is None.
|
|
parser_chain: Optional string containing the parsing chain up to this
|
|
point. The default is None.
|
|
cookie_data: The cookie data, as a byte string.
|
|
url: The full URL or path where the cookie got set.
|
|
"""
|
|
# The structure of the field:
|
|
# <domain hash>.<last time>.<sessions>.<sources>.<variables>
|
|
fields = cookie_data.split('.')
|
|
|
|
if len(fields) > 5:
|
|
variables = u'.'.join(fields[4:])
|
|
fields = fields[0:4]
|
|
fields.append(variables)
|
|
|
|
if len(fields) != 5:
|
|
raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 5]'.format(
|
|
len(fields)))
|
|
|
|
domain_hash, last, sessions, sources, variables = fields
|
|
extra_variables = variables.split(u'|')
|
|
|
|
kwargs = {}
|
|
for variable in extra_variables:
|
|
key, _, value = variable.partition(u'=')
|
|
try:
|
|
value_line = unicode(urllib.unquote(str(value)), 'utf-8')
|
|
except UnicodeDecodeError:
|
|
value_line = repr(value)
|
|
|
|
kwargs[key] = value_line
|
|
|
|
event_object = GoogleAnalyticsEvent(
|
|
int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME,
|
|
url, 'utmz', self.COOKIE_NAME, domain_hash=domain_hash,
|
|
sessions=int(sessions, 10), sources=int(sources, 10),
|
|
**kwargs)
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
|
|
class GoogleAnalyticsUtmaPlugin(interface.CookiePlugin):
|
|
"""A browser cookie plugin for Google Analytics cookies."""
|
|
|
|
NAME = 'google_analytics_utma'
|
|
|
|
COOKIE_NAME = u'__utma'
|
|
|
|
# Point to few sources for URL information.
|
|
URLS = [
|
|
(u'http://www.dfinews.com/articles/2012/02/'
|
|
u'google-analytics-cookies-and-forensic-implications')]
|
|
|
|
def GetEntries(
|
|
self, parser_context, file_entry=None, parser_chain=None,
|
|
cookie_data=None, url=None, **unused_kwargs):
|
|
"""Extracts event objects from the cookie.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
file_entry: Optional file entry object (instance of dfvfs.FileEntry).
|
|
The default is None.
|
|
parser_chain: Optional string containing the parsing chain up to this
|
|
point. The default is None.
|
|
cookie_data: The cookie data, as a byte string.
|
|
url: The full URL or path where the cookie got set.
|
|
"""
|
|
# Values has the structure of:
|
|
# <domain hash>.<visitor ID>.<first visit>.<previous>.<last>.<# of
|
|
# sessions>
|
|
fields = cookie_data.split(u'.')
|
|
|
|
# Check for a valid record.
|
|
if len(fields) != 6:
|
|
raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 6]'.format(
|
|
len(fields)))
|
|
|
|
domain_hash, visitor_id, first_visit, previous, last, sessions = fields
|
|
|
|
# TODO: Double check this time is stored in UTC and not local time.
|
|
first_epoch = int(first_visit, 10)
|
|
event_object = GoogleAnalyticsEvent(
|
|
first_epoch, 'Analytics Creation Time', url, 'utma', self.COOKIE_NAME,
|
|
domain_hash=domain_hash, visitor_id=visitor_id,
|
|
sessions=int(sessions, 10))
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
event_object = GoogleAnalyticsEvent(
|
|
int(previous, 10), 'Analytics Previous Time', url, 'utma',
|
|
self.COOKIE_NAME, domain_hash=domain_hash, visitor_id=visitor_id,
|
|
sessions=int(sessions, 10))
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
event_object = GoogleAnalyticsEvent(
|
|
int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME,
|
|
url, 'utma', self.COOKIE_NAME, domain_hash=domain_hash,
|
|
visitor_id=visitor_id, sessions=int(sessions, 10))
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
|
|
class GoogleAnalyticsUtmbPlugin(interface.CookiePlugin):
|
|
"""A browser cookie plugin for Google Analytics cookies."""
|
|
|
|
NAME = 'google_analytics_utmb'
|
|
|
|
COOKIE_NAME = u'__utmb'
|
|
|
|
# Point to few sources for URL information.
|
|
URLS = [
|
|
(u'http://www.dfinews.com/articles/2012/02/'
|
|
u'google-analytics-cookies-and-forensic-implications')]
|
|
|
|
def GetEntries(
|
|
self, parser_context, file_entry=None, parser_chain=None,
|
|
cookie_data=None, url=None, **unused_kwargs):
|
|
"""Extracts event objects from the cookie.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
file_entry: Optional file entry object (instance of dfvfs.FileEntry).
|
|
The default is None.
|
|
parser_chain: Optional string containing the parsing chain up to this
|
|
point. The default is None.
|
|
cookie_data: The cookie data, as a byte string.
|
|
url: The full URL or path where the cookie got set.
|
|
"""
|
|
# Values has the structure of:
|
|
# <domain hash>.<pages viewed>.10.<last time>
|
|
fields = cookie_data.split(u'.')
|
|
|
|
# Check for a valid record.
|
|
if len(fields) != 4:
|
|
raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 4]'.format(
|
|
len(fields)))
|
|
|
|
domain_hash, pages_viewed, _, last = fields
|
|
|
|
event_object = GoogleAnalyticsEvent(
|
|
int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME,
|
|
url, 'utmb', self.COOKIE_NAME, domain_hash=domain_hash,
|
|
pages_viewed=int(pages_viewed, 10))
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|