237 lines
9.3 KiB
Python
237 lines
9.3 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2013 The Plaso Project Authors.
|
|
# Please see the AUTHORS file for details on individual authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the 'License');
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Parser for Java Cache IDX files."""
|
|
|
|
# TODO:
|
|
# * 6.02 files did not retain IP addresses. However, the
|
|
# deploy_resource_codebase header field may contain the host IP.
|
|
# This needs to be researched further, as that field may not always
|
|
# be present. 6.02 files will currently return 'Unknown'.
|
|
import construct
|
|
|
|
from plaso.events import time_events
|
|
from plaso.lib import errors
|
|
from plaso.lib import eventdata
|
|
from plaso.lib import timelib
|
|
from plaso.parsers import interface
|
|
from plaso.parsers import manager
|
|
|
|
|
|
class JavaIDXEvent(time_events.TimestampEvent):
|
|
"""Convenience class for a Java IDX cache file download event."""
|
|
|
|
DATA_TYPE = 'java:download:idx'
|
|
|
|
def __init__(
|
|
self, timestamp, timestamp_description, idx_version, url, ip_address):
|
|
"""Initializes the event object.
|
|
|
|
Args:
|
|
timestamp: The timestamp value.
|
|
timestamp_description: The description of the usage of the time value.
|
|
idx_version: Version of IDX file.
|
|
url: URL of the downloaded file.
|
|
ip_address: IP address of the host in the URL.
|
|
"""
|
|
super(JavaIDXEvent, self).__init__(timestamp, timestamp_description)
|
|
self.idx_version = idx_version
|
|
self.url = url
|
|
self.ip_address = ip_address
|
|
|
|
|
|
class JavaIDXParser(interface.BaseParser):
|
|
"""Parse Java IDX files for download events.
|
|
|
|
There are five structures defined. 6.02 files had one generic section
|
|
that retained all data. From 6.03, the file went to a multi-section
|
|
format where later sections were optional and had variable-lengths.
|
|
6.03, 6.04, and 6.05 files all have their main data section (#2)
|
|
begin at offset 128. The short structure is because 6.05 files
|
|
deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
|
|
valid, get the file version, then continue on with the correct
|
|
structures.
|
|
"""
|
|
|
|
NAME = 'java_idx'
|
|
DESCRIPTION = u'Parser for Java IDX files.'
|
|
|
|
IDX_SHORT_STRUCT = construct.Struct(
|
|
'magic',
|
|
construct.UBInt8('busy'),
|
|
construct.UBInt8('incomplete'),
|
|
construct.UBInt32('idx_version'))
|
|
|
|
IDX_602_STRUCT = construct.Struct(
|
|
'IDX_602_Full',
|
|
construct.UBInt16('null_space'),
|
|
construct.UBInt8('shortcut'),
|
|
construct.UBInt32('content_length'),
|
|
construct.UBInt64('last_modified_date'),
|
|
construct.UBInt64('expiration_date'),
|
|
construct.PascalString(
|
|
'version_string', length_field=construct.UBInt16('length')),
|
|
construct.PascalString(
|
|
'url', length_field=construct.UBInt16('length')),
|
|
construct.PascalString(
|
|
'namespace', length_field=construct.UBInt16('length')),
|
|
construct.UBInt32('FieldCount'))
|
|
|
|
IDX_605_SECTION_ONE_STRUCT = construct.Struct(
|
|
'IDX_605_Section1',
|
|
construct.UBInt8('shortcut'),
|
|
construct.UBInt32('content_length'),
|
|
construct.UBInt64('last_modified_date'),
|
|
construct.UBInt64('expiration_date'),
|
|
construct.UBInt64('validation_date'),
|
|
construct.UBInt8('signed'),
|
|
construct.UBInt32('sec2len'),
|
|
construct.UBInt32('sec3len'),
|
|
construct.UBInt32('sec4len'))
|
|
|
|
IDX_605_SECTION_TWO_STRUCT = construct.Struct(
|
|
'IDX_605_Section2',
|
|
construct.PascalString(
|
|
'version', length_field=construct.UBInt16('length')),
|
|
construct.PascalString(
|
|
'url', length_field=construct.UBInt16('length')),
|
|
construct.PascalString(
|
|
'namespec', length_field=construct.UBInt16('length')),
|
|
construct.PascalString(
|
|
'ip_address', length_field=construct.UBInt16('length')),
|
|
construct.UBInt32('FieldCount'))
|
|
|
|
# Java uses Pascal-style strings, but with a 2-byte length field.
|
|
JAVA_READUTF_STRING = construct.Struct(
|
|
'Java.ReadUTF',
|
|
construct.PascalString(
|
|
'string', length_field=construct.UBInt16('length')))
|
|
|
|
def Parse(self, parser_context, file_entry, parser_chain=None):
|
|
"""Extract data from a Java cache IDX file.
|
|
|
|
This is the main parsing engine for the parser. It determines if
|
|
the selected file is a proper IDX file. It then checks the file
|
|
version to determine the correct structure to apply to extract
|
|
data.
|
|
|
|
Args:
|
|
parser_context: A parser context object (instance of ParserContext).
|
|
file_entry: A file entry object (instance of dfvfs.FileEntry).
|
|
parser_chain: Optional string containing the parsing chain up to this
|
|
point. The default is None.
|
|
"""
|
|
file_object = file_entry.GetFileObject()
|
|
try:
|
|
magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
|
|
except (IOError, construct.FieldError) as exception:
|
|
raise errors.UnableToParseFile(
|
|
u'Unable to parse Java IDX file with error: {0:s}.'.format(exception))
|
|
|
|
# Fields magic.busy and magic.incomplete are normally 0x00. They
|
|
# are set to 0x01 if the file is currently being downloaded. Logic
|
|
# checks for > 1 to avoid a race condition and still reject any
|
|
# file with other data.
|
|
# Field magic.idx_version is the file version, of which only
|
|
# certain versions are supported.
|
|
if magic.busy > 1 or magic.incomplete > 1:
|
|
raise errors.UnableToParseFile(u'Not a valid Java IDX file')
|
|
|
|
if not magic.idx_version in [602, 603, 604, 605]:
|
|
raise errors.UnableToParseFile(u'Not a valid Java IDX file')
|
|
|
|
# Add ourselves to the parser chain, which will be used in all subsequent
|
|
# event creation in this parser.
|
|
parser_chain = self._BuildParserChain(parser_chain)
|
|
|
|
# Obtain the relevant values from the file. The last modified date
|
|
# denotes when the file was last modified on the HOST. For example,
|
|
# when the file was uploaded to a web server.
|
|
if magic.idx_version == 602:
|
|
section_one = self.IDX_602_STRUCT.parse_stream(file_object)
|
|
last_modified_date = section_one.last_modified_date
|
|
url = section_one.url
|
|
ip_address = 'Unknown'
|
|
http_header_count = section_one.FieldCount
|
|
elif magic.idx_version in [603, 604, 605]:
|
|
|
|
# IDX 6.03 and 6.04 have two unused bytes before the structure.
|
|
if magic.idx_version in [603, 604]:
|
|
file_object.read(2)
|
|
|
|
# IDX 6.03, 6.04, and 6.05 files use the same structures for the
|
|
# remaining data.
|
|
section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(file_object)
|
|
last_modified_date = section_one.last_modified_date
|
|
if file_object.get_size() > 128:
|
|
file_object.seek(128) # Static offset for section 2.
|
|
section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(file_object)
|
|
url = section_two.url
|
|
ip_address = section_two.ip_address
|
|
http_header_count = section_two.FieldCount
|
|
else:
|
|
url = 'Unknown'
|
|
ip_address = 'Unknown'
|
|
http_header_count = 0
|
|
|
|
# File offset is now just prior to HTTP headers. Make sure there
|
|
# are headers, and then parse them to retrieve the download date.
|
|
download_date = None
|
|
for field in range(0, http_header_count):
|
|
field = self.JAVA_READUTF_STRING.parse_stream(file_object)
|
|
value = self.JAVA_READUTF_STRING.parse_stream(file_object)
|
|
if field.string == 'date':
|
|
# Time string "should" be in UTC or have an associated time zone
|
|
# information in the string itself. If that is not the case then
|
|
# there is no reliable method for plaso to determine the proper
|
|
# timezone, so the assumption is that it is UTC.
|
|
download_date = timelib.Timestamp.FromTimeString(
|
|
value.string, gmt_as_timezone=False)
|
|
|
|
if not url or not ip_address:
|
|
raise errors.UnableToParseFile(
|
|
u'Unexpected Error: URL or IP address not found in file.')
|
|
|
|
last_modified_timestamp = timelib.Timestamp.FromJavaTime(
|
|
last_modified_date)
|
|
# TODO: Move the timestamp description fields into eventdata.
|
|
event_object = JavaIDXEvent(
|
|
last_modified_timestamp, 'File Hosted Date', magic.idx_version, url,
|
|
ip_address)
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
if section_one:
|
|
expiration_date = section_one.get('expiration_date', None)
|
|
if expiration_date:
|
|
expiration_timestamp = timelib.Timestamp.FromJavaTime(expiration_date)
|
|
event_object = JavaIDXEvent(
|
|
expiration_timestamp, 'File Expiration Date', magic.idx_version,
|
|
url, ip_address)
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
if download_date:
|
|
event_object = JavaIDXEvent(
|
|
download_date, eventdata.EventTimestamp.FILE_DOWNLOADED,
|
|
magic.idx_version, url, ip_address)
|
|
parser_context.ProduceEvent(
|
|
event_object, parser_chain=parser_chain, file_entry=file_entry)
|
|
|
|
|
|
manager.ParsersManager.RegisterParser(JavaIDXParser)
|